From bdde10137b3e67383f38329b02b329a906b78d5d Mon Sep 17 00:00:00 2001 From: Radu Berinde Date: Sun, 28 Aug 2016 14:36:06 -0400 Subject: [PATCH] hash/crc32: cleanup code and improve tests Major reorganization of the crc32 code: - The arch-specific files now implement a well-defined interface (documented in crc32.go). They no longer have the responsibility of initializing and falling back to a non-accelerated implementation; instead, that happens in the higher level code. - The non-accelerated algorithms are moved to a separate file with no dependencies on other code. - The "cutoff" optimization for slicing-by-8 is moved inside the algorithm itself (as opposed to every callsite). Tests are significantly improved: - direct tests for the non-accelerated algorithms. - "cross-check" tests for arch-specific implementations (all archs). - tests for misaligned buffers for both IEEE and Castagnoli. Fixes #16909. Change-Id: I9b6dd83b7a57cd615eae901c0a6d61c6b8091c74 Reviewed-on: https://go-review.googlesource.com/27935 Reviewed-by: Keith Randall --- src/hash/crc32/crc32.go | 167 +++++++++++++++-------------- src/hash/crc32/crc32_amd64.go | 60 ++++++----- src/hash/crc32/crc32_amd64_test.go | 44 -------- src/hash/crc32/crc32_amd64p32.go | 36 +++---- src/hash/crc32/crc32_generic.go | 92 ++++++++++++---- src/hash/crc32/crc32_otherarch.go | 15 +++ src/hash/crc32/crc32_s390x.go | 83 +++++++------- src/hash/crc32/crc32_test.go | 164 +++++++++++++++++++++++----- 8 files changed, 407 insertions(+), 254 deletions(-) delete mode 100644 src/hash/crc32/crc32_amd64_test.go create mode 100644 src/hash/crc32/crc32_otherarch.go diff --git a/src/hash/crc32/crc32.go b/src/hash/crc32/crc32.go index 6eed8ff300..8aa91b17e9 100644 --- a/src/hash/crc32/crc32.go +++ b/src/hash/crc32/crc32.go @@ -20,9 +20,6 @@ import ( // The size of a CRC-32 checksum in bytes. const Size = 4 -// Use "slice by 8" when payload >= this value. -const sliceBy8Cutoff = 16 - // Predefined polynomials. const ( // IEEE is by far and away the most common CRC-32 polynomial. @@ -43,80 +40,96 @@ const ( // Table is a 256-word table representing the polynomial for efficient processing. type Table [256]uint32 +// This file makes use of functions implemented in architecture-specific files. +// The interface that they implement is as follows: +// +// // archAvailableIEEE reports whether an architecture-specific CRC32-IEEE +// // algorithm is available. +// archAvailableIEEE() bool +// +// // archInitIEEE initializes the architecture-specific CRC3-IEEE algorithm. +// // It can only be called if archAvailableIEEE() returns true. +// archInitIEEE() +// +// // archUpdateIEEE updates the given CRC32-IEEE. It can only be called if +// // archInitIEEE() was previously called. +// archUpdateIEEE(crc uint32, p []byte) uint32 +// +// // archAvailableCastagnoli reports whether an architecture-specific +// // CRC32-C algorithm is available. +// archAvailableCastagnoli() bool +// +// // archInitCastagnoli initializes the architecture-specific CRC32-C +// // algorithm. It can only be called if archAvailableCastagnoli() returns +// // true. +// archInitCastagnoli() +// +// // archUpdateCastagnoli updates the given CRC32-C. It can only be called +// // if archInitCastagnoli() was previously called. +// archUpdateCastagnoli(crc uint32, p []byte) uint32 + // castagnoliTable points to a lazily initialized Table for the Castagnoli // polynomial. MakeTable will always return this value when asked to make a // Castagnoli table so we can compare against it to find when the caller is // using this polynomial. var castagnoliTable *Table var castagnoliTable8 *slicing8Table +var castagnoliArchImpl bool +var updateCastagnoli func(crc uint32, p []byte) uint32 var castagnoliOnce sync.Once func castagnoliInit() { - // Call the arch-specific init function and let it decide if we will need - // the tables for the generic implementation. - needGenericTables := castagnoliInitArch() - - if needGenericTables { - castagnoliTable8 = makeTable8(Castagnoli) + castagnoliTable = simpleMakeTable(Castagnoli) + castagnoliArchImpl = archAvailableCastagnoli() + + if castagnoliArchImpl { + archInitCastagnoli() + updateCastagnoli = archUpdateCastagnoli + } else { + // Initialize the slicing-by-8 table. + castagnoliTable8 = slicingMakeTable(Castagnoli) + updateCastagnoli = func(crc uint32, p []byte) uint32 { + return slicingUpdate(crc, castagnoliTable8, p) + } } - - // Even if we don't need the contents of this table, we use it as a handle - // returned by MakeTable. We should find a way to clean this up (see #16909). - castagnoliTable = makeTable(Castagnoli) } // IEEETable is the table for the IEEE polynomial. -var IEEETable = makeTable(IEEE) - -// slicing8Table is array of 8 Tables -type slicing8Table [8]Table +var IEEETable = simpleMakeTable(IEEE) // ieeeTable8 is the slicing8Table for IEEE var ieeeTable8 *slicing8Table -var ieeeTable8Once sync.Once +var ieeeArchImpl bool +var updateIEEE func(crc uint32, p []byte) uint32 +var ieeeOnce sync.Once + +func ieeeInit() { + ieeeArchImpl = archAvailableIEEE() + + if ieeeArchImpl { + archInitIEEE() + updateIEEE = archUpdateIEEE + } else { + // Initialize the slicing-by-8 table. + ieeeTable8 = slicingMakeTable(IEEE) + updateIEEE = func(crc uint32, p []byte) uint32 { + return slicingUpdate(crc, ieeeTable8, p) + } + } +} // MakeTable returns a Table constructed from the specified polynomial. // The contents of this Table must not be modified. func MakeTable(poly uint32) *Table { switch poly { case IEEE: + ieeeOnce.Do(ieeeInit) return IEEETable case Castagnoli: castagnoliOnce.Do(castagnoliInit) return castagnoliTable } - return makeTable(poly) -} - -// makeTable returns the Table constructed from the specified polynomial. -func makeTable(poly uint32) *Table { - t := new(Table) - for i := 0; i < 256; i++ { - crc := uint32(i) - for j := 0; j < 8; j++ { - if crc&1 == 1 { - crc = (crc >> 1) ^ poly - } else { - crc >>= 1 - } - } - t[i] = crc - } - return t -} - -// makeTable8 returns slicing8Table constructed from the specified polynomial. -func makeTable8(poly uint32) *slicing8Table { - t := new(slicing8Table) - t[0] = *makeTable(poly) - for i := 0; i < 256; i++ { - crc := t[0][i] - for j := 1; j < 8; j++ { - crc = t[0][crc&0xFF] ^ (crc >> 8) - t[j][i] = crc - } - } - return t + return simpleMakeTable(poly) } // digest represents the partial evaluation of a checksum. @@ -128,7 +141,12 @@ type digest struct { // New creates a new hash.Hash32 computing the CRC-32 checksum // using the polynomial represented by the Table. // Its Sum method will lay the value out in big-endian byte order. -func New(tab *Table) hash.Hash32 { return &digest{0, tab} } +func New(tab *Table) hash.Hash32 { + if tab == IEEETable { + ieeeOnce.Do(ieeeInit) + } + return &digest{0, tab} +} // NewIEEE creates a new hash.Hash32 computing the CRC-32 checksum // using the IEEE polynomial. @@ -141,44 +159,32 @@ func (d *digest) BlockSize() int { return 1 } func (d *digest) Reset() { d.crc = 0 } -func update(crc uint32, tab *Table, p []byte) uint32 { - crc = ^crc - for _, v := range p { - crc = tab[byte(crc)^v] ^ (crc >> 8) - } - return ^crc -} - -// updateSlicingBy8 updates CRC using Slicing-by-8 -func updateSlicingBy8(crc uint32, tab *slicing8Table, p []byte) uint32 { - crc = ^crc - for len(p) > 8 { - crc ^= uint32(p[0]) | uint32(p[1])<<8 | uint32(p[2])<<16 | uint32(p[3])<<24 - crc = tab[0][p[7]] ^ tab[1][p[6]] ^ tab[2][p[5]] ^ tab[3][p[4]] ^ - tab[4][crc>>24] ^ tab[5][(crc>>16)&0xFF] ^ - tab[6][(crc>>8)&0xFF] ^ tab[7][crc&0xFF] - p = p[8:] - } - crc = ^crc - if len(p) == 0 { - return crc - } - return update(crc, &tab[0], p) -} - // Update returns the result of adding the bytes in p to the crc. func Update(crc uint32, tab *Table, p []byte) uint32 { switch tab { case castagnoliTable: return updateCastagnoli(crc, p) case IEEETable: + // Unfortunately, because IEEETable is exported, IEEE may be used without a + // call to MakeTable. We have to make sure it gets initialized in that case. + ieeeOnce.Do(ieeeInit) return updateIEEE(crc, p) + default: + return simpleUpdate(crc, tab, p) } - return update(crc, tab, p) } func (d *digest) Write(p []byte) (n int, err error) { - d.crc = Update(d.crc, d.tab, p) + switch d.tab { + case castagnoliTable: + d.crc = updateCastagnoli(d.crc, p) + case IEEETable: + // We only create digest objects through New() which takes care of + // initialization in this case. + d.crc = updateIEEE(d.crc, p) + default: + d.crc = simpleUpdate(d.crc, d.tab, p) + } return len(p), nil } @@ -195,4 +201,7 @@ func Checksum(data []byte, tab *Table) uint32 { return Update(0, tab, data) } // ChecksumIEEE returns the CRC-32 checksum of data // using the IEEE polynomial. -func ChecksumIEEE(data []byte) uint32 { return updateIEEE(0, data) } +func ChecksumIEEE(data []byte) uint32 { + ieeeOnce.Do(ieeeInit) + return updateIEEE(0, data) +} diff --git a/src/hash/crc32/crc32_amd64.go b/src/hash/crc32/crc32_amd64.go index a071cbcb88..369a436be9 100644 --- a/src/hash/crc32/crc32_amd64.go +++ b/src/hash/crc32/crc32_amd64.go @@ -2,6 +2,10 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +// AMD64-specific hardware-assisted CRC32 algorithms. See crc32.go for a +// description of the interface that each architecture-specific file +// implements. + package crc32 import "unsafe" @@ -45,9 +49,13 @@ type sse42Table [4]Table var castagnoliSSE42TableK1 *sse42Table var castagnoliSSE42TableK2 *sse42Table -func castagnoliInitArch() (needGenericTables bool) { +func archAvailableCastagnoli() bool { + return sse42 +} + +func archInitCastagnoli() { if !sse42 { - return true + panic("arch-specific Castagnoli not available") } castagnoliSSE42TableK1 = new(sse42Table) castagnoliSSE42TableK2 = new(sse42Table) @@ -65,7 +73,6 @@ func castagnoliInitArch() (needGenericTables bool) { castagnoliSSE42TableK2[b][i] = castagnoliSSE42(val, tmp[:]) } } - return false } // castagnoliShift computes the CRC32-C of K1 or K2 zeroes (depending on the @@ -78,13 +85,9 @@ func castagnoliShift(table *sse42Table, crc uint32) uint32 { table[0][crc&0xFF] } -func updateCastagnoli(crc uint32, p []byte) uint32 { +func archUpdateCastagnoli(crc uint32, p []byte) uint32 { if !sse42 { - // Use slicing-by-8 on larger inputs. - if len(p) >= sliceBy8Cutoff { - return updateSlicingBy8(crc, castagnoliTable8, p) - } - return update(crc, castagnoliTable, p) + panic("not available") } // This method is inspired from the algorithm in Intel's white paper: @@ -193,24 +196,33 @@ func updateCastagnoli(crc uint32, p []byte) uint32 { return ^crc } -func updateIEEE(crc uint32, p []byte) uint32 { - if useFastIEEE && len(p) >= 64 { +func archAvailableIEEE() bool { + return useFastIEEE +} + +var archIeeeTable8 *slicing8Table + +func archInitIEEE() { + if !useFastIEEE { + panic("not available") + } + // We still use slicing-by-8 for small buffers. + archIeeeTable8 = slicingMakeTable(IEEE) +} + +func archUpdateIEEE(crc uint32, p []byte) uint32 { + if !useFastIEEE { + panic("not available") + } + + if len(p) >= 64 { left := len(p) & 15 do := len(p) - left crc = ^ieeeCLMUL(^crc, p[:do]) - if left > 0 { - crc = update(crc, IEEETable, p[do:]) - } - return crc + p = p[do:] } - - // Use slicing-by-8 on larger inputs. - if len(p) >= sliceBy8Cutoff { - ieeeTable8Once.Do(func() { - ieeeTable8 = makeTable8(IEEE) - }) - return updateSlicingBy8(crc, ieeeTable8, p) + if len(p) == 0 { + return crc } - - return update(crc, IEEETable, p) + return slicingUpdate(crc, archIeeeTable8, p) } diff --git a/src/hash/crc32/crc32_amd64_test.go b/src/hash/crc32/crc32_amd64_test.go deleted file mode 100644 index e136f788d6..0000000000 --- a/src/hash/crc32/crc32_amd64_test.go +++ /dev/null @@ -1,44 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package crc32 - -import ( - "math/rand" - "testing" -) - -func TestCastagnoliSSE42(t *testing.T) { - if !sse42 { - t.Skip("SSE42 not supported") - } - - // Init the SSE42 tables. - castagnoliOnce.Do(castagnoliInit) - - // Generate a table to use with the non-SSE version. - slicingTable := makeTable8(Castagnoli) - - // The optimized SSE4.2 implementation behaves differently for different - // lengths (especially around multiples of K*3). Crosscheck against the - // software implementation for various lengths. - for _, base := range []int{castagnoliK1, castagnoliK2, castagnoliK1 + castagnoliK2} { - for _, baseMult := range []int{2, 3, 5, 6, 9, 30} { - for _, variation := range []int{0, 1, 2, 3, 4, 7, 10, 16, 32, 50, 128} { - for _, varMult := range []int{-2, -1, +1, +2} { - length := base*baseMult + variation*varMult - p := make([]byte, length) - _, _ = rand.Read(p) - crcInit := uint32(rand.Int63()) - correct := updateSlicingBy8(crcInit, slicingTable, p) - result := updateCastagnoli(crcInit, p) - if result != correct { - t.Errorf("SSE42 implementation = 0x%x want 0x%x (buffer length %d)", - result, correct, len(p)) - } - } - } - } - } -} diff --git a/src/hash/crc32/crc32_amd64p32.go b/src/hash/crc32/crc32_amd64p32.go index 48d181f295..9d728fc8fe 100644 --- a/src/hash/crc32/crc32_amd64p32.go +++ b/src/hash/crc32/crc32_amd64p32.go @@ -11,37 +11,31 @@ package crc32 // support. func haveSSE42() bool -// castagnoliSSE42 is defined in crc32_amd64.s and uses the SSE4.2 CRC32 +// castagnoliSSE42 is defined in crc32_amd64p32.s and uses the SSE4.2 CRC32 // instruction. //go:noescape func castagnoliSSE42(crc uint32, p []byte) uint32 var sse42 = haveSSE42() -func castagnoliInitArch() (needGenericTables bool) { - // We only need the generic implementation tables if we don't have SSE4.2. - return !sse42 +func archAvailableCastagnoli() bool { + return sse42 } -func updateCastagnoli(crc uint32, p []byte) uint32 { - if sse42 { - return castagnoliSSE42(crc, p) +func archInitCastagnoli() { + if !sse42 { + panic("not available") } - // Use slicing-by-8 on larger inputs. - if len(p) >= sliceBy8Cutoff { - return updateSlicingBy8(crc, castagnoliTable8, p) - } - return update(crc, castagnoliTable, p) + // No initialization necessary. } -func updateIEEE(crc uint32, p []byte) uint32 { - // Use slicing-by-8 on larger inputs. - if len(p) >= sliceBy8Cutoff { - ieeeTable8Once.Do(func() { - ieeeTable8 = makeTable8(IEEE) - }) - return updateSlicingBy8(crc, ieeeTable8, p) +func archUpdateCastagnoli(crc uint32, p []byte) uint32 { + if !sse42 { + panic("not available") } - - return update(crc, IEEETable, p) + return castagnoliSSE42(crc, p) } + +func archAvailableIEEE() bool { return false } +func archInitIEEE() { panic("not available") } +func archUpdateIEEE(crc uint32, p []byte) uint32 { panic("not available") } diff --git a/src/hash/crc32/crc32_generic.go b/src/hash/crc32/crc32_generic.go index decf973066..abacbb663d 100644 --- a/src/hash/crc32/crc32_generic.go +++ b/src/hash/crc32/crc32_generic.go @@ -2,32 +2,88 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build !amd64,!amd64p32,!s390x +// This file contains CRC32 algorithms that are not specific to any architecture +// and don't use hardware acceleration. +// +// The simple (and slow) CRC32 implementation only uses a 256*4 bytes table. +// +// The slicing-by-8 algorithm is a faster implementation that uses a bigger +// table (8*256*4 bytes). package crc32 -// This file contains the generic version of updateCastagnoli which does -// slicing-by-8, or uses the fallback for very small sizes. +// simpleMakeTable allocates and constructs a Table for the specified +// polynomial. The table is suitable for use with the simple algorithm +// (simpleUpdate). +func simpleMakeTable(poly uint32) *Table { + t := new(Table) + simplePopulateTable(poly, t) + return t +} + +// simplePopulateTable constructs a Table for the specified polynomial, suitable +// for use with simpleUpdate. +func simplePopulateTable(poly uint32, t *Table) { + for i := 0; i < 256; i++ { + crc := uint32(i) + for j := 0; j < 8; j++ { + if crc&1 == 1 { + crc = (crc >> 1) ^ poly + } else { + crc >>= 1 + } + } + t[i] = crc + } +} -func castagnoliInitArch() (needGenericTables bool) { - return true +// simpleUpdate uses the simple algorithm to update the CRC, given a table that +// was previously computed using simpleMakeTable. +func simpleUpdate(crc uint32, tab *Table, p []byte) uint32 { + crc = ^crc + for _, v := range p { + crc = tab[byte(crc)^v] ^ (crc >> 8) + } + return ^crc } -func updateCastagnoli(crc uint32, p []byte) uint32 { - // Use slicing-by-8 on larger inputs. - if len(p) >= sliceBy8Cutoff { - return updateSlicingBy8(crc, castagnoliTable8, p) +// Use slicing-by-8 when payload >= this value. +const slicing8Cutoff = 16 + +// slicing8Table is array of 8 Tables, used by the slicing-by-8 algorithm. +type slicing8Table [8]Table + +// slicingMakeTable constructs a slicing8Table for the specified polynomial. The +// table is suitable for use with the slicing-by-8 algorithm (slicingUpdate). +func slicingMakeTable(poly uint32) *slicing8Table { + t := new(slicing8Table) + simplePopulateTable(poly, &t[0]) + for i := 0; i < 256; i++ { + crc := t[0][i] + for j := 1; j < 8; j++ { + crc = t[0][crc&0xFF] ^ (crc >> 8) + t[j][i] = crc + } } - return update(crc, castagnoliTable, p) + return t } -func updateIEEE(crc uint32, p []byte) uint32 { - // Use slicing-by-8 on larger inputs. - if len(p) >= sliceBy8Cutoff { - ieeeTable8Once.Do(func() { - ieeeTable8 = makeTable8(IEEE) - }) - return updateSlicingBy8(crc, ieeeTable8, p) +// slicingUpdate uses the slicing-by-8 algorithm to update the CRC, given a +// table that was previously computed using slicingMakeTable. +func slicingUpdate(crc uint32, tab *slicing8Table, p []byte) uint32 { + if len(p) >= slicing8Cutoff { + crc = ^crc + for len(p) > 8 { + crc ^= uint32(p[0]) | uint32(p[1])<<8 | uint32(p[2])<<16 | uint32(p[3])<<24 + crc = tab[0][p[7]] ^ tab[1][p[6]] ^ tab[2][p[5]] ^ tab[3][p[4]] ^ + tab[4][crc>>24] ^ tab[5][(crc>>16)&0xFF] ^ + tab[6][(crc>>8)&0xFF] ^ tab[7][crc&0xFF] + p = p[8:] + } + crc = ^crc + } + if len(p) == 0 { + return crc } - return update(crc, IEEETable, p) + return simpleUpdate(crc, &tab[0], p) } diff --git a/src/hash/crc32/crc32_otherarch.go b/src/hash/crc32/crc32_otherarch.go new file mode 100644 index 0000000000..cc960764bc --- /dev/null +++ b/src/hash/crc32/crc32_otherarch.go @@ -0,0 +1,15 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !amd64,!amd64p32,!s390x + +package crc32 + +func archAvailableIEEE() bool { return false } +func archInitIEEE() { panic("not available") } +func archUpdateIEEE(crc uint32, p []byte) uint32 { panic("not available") } + +func archAvailableCastagnoli() bool { return false } +func archInitCastagnoli() { panic("not available") } +func archUpdateCastagnoli(crc uint32, p []byte) uint32 { panic("not available") } diff --git a/src/hash/crc32/crc32_s390x.go b/src/hash/crc32/crc32_s390x.go index 72d2648280..d13000d058 100644 --- a/src/hash/crc32/crc32_s390x.go +++ b/src/hash/crc32/crc32_s390x.go @@ -25,62 +25,65 @@ func vectorizedCastagnoli(crc uint32, p []byte) uint32 //go:noescape func vectorizedIEEE(crc uint32, p []byte) uint32 -func castagnoliInitArch() (needGenericTables bool) { - return true +func archAvailableCastagnoli() bool { + return hasVX } -func genericCastagnoli(crc uint32, p []byte) uint32 { - // Use slicing-by-8 on larger inputs. - if len(p) >= sliceBy8Cutoff { - return updateSlicingBy8(crc, castagnoliTable8, p) - } - return update(crc, castagnoliTable, p) -} +var archCastagnoliTable8 *slicing8Table -func genericIEEE(crc uint32, p []byte) uint32 { - // Use slicing-by-8 on larger inputs. - if len(p) >= sliceBy8Cutoff { - ieeeTable8Once.Do(func() { - ieeeTable8 = makeTable8(IEEE) - }) - return updateSlicingBy8(crc, ieeeTable8, p) +func archInitCastagnoli() { + if !hasVX { + panic("not available") } - return update(crc, IEEETable, p) + // We still use slicing-by-8 for small buffers. + archCastagnoliTable8 = slicingMakeTable(Castagnoli) } -// updateCastagnoli calculates the checksum of p using -// vectorizedCastagnoli if possible and falling back onto -// genericCastagnoli as needed. -func updateCastagnoli(crc uint32, p []byte) uint32 { - // Use vectorized function if vector facility is available and - // data length is above threshold. - if hasVX && len(p) >= vxMinLen { +// archUpdateCastagnoli calculates the checksum of p using +// vectorizedCastagnoli. +func archUpdateCastagnoli(crc uint32, p []byte) uint32 { + if !hasVX { + panic("not available") + } + // Use vectorized function if data length is above threshold. + if len(p) >= vxMinLen { aligned := len(p) & ^vxAlignMask crc = vectorizedCastagnoli(crc, p[:aligned]) p = p[aligned:] - // process remaining data - if len(p) > 0 { - crc = genericCastagnoli(crc, p) - } + } + if len(p) == 0 { return crc } - return genericCastagnoli(crc, p) + return slicingUpdate(crc, archCastagnoliTable8, p) +} + +func archAvailableIEEE() bool { + return hasVX +} + +var archIeeeTable8 *slicing8Table + +func archInitIEEE() { + if !hasVX { + panic("not available") + } + // We still use slicing-by-8 for small buffers. + archIeeeTable8 = slicingMakeTable(IEEE) } -// updateIEEE calculates the checksum of p using vectorizedIEEE if -// possible and falling back onto genericIEEE as needed. -func updateIEEE(crc uint32, p []byte) uint32 { - // Use vectorized function if vector facility is available and - // data length is above threshold. - if hasVX && len(p) >= vxMinLen { +// archUpdateIEEE calculates the checksum of p using vectorizedIEEE. +func archUpdateIEEE(crc uint32, p []byte) uint32 { + if !hasVX { + panic("not available") + } + // Use vectorized function if data length is above threshold. + if len(p) >= vxMinLen { aligned := len(p) & ^vxAlignMask crc = vectorizedIEEE(crc, p[:aligned]) p = p[aligned:] - // process remaining data - if len(p) > 0 { - crc = genericIEEE(crc, p) - } + } + if len(p) == 0 { return crc } - return genericIEEE(crc, p) + return slicingUpdate(crc, archIeeeTable8, p) } diff --git a/src/hash/crc32/crc32_test.go b/src/hash/crc32/crc32_test.go index 7f7f0a2f74..1356734d50 100644 --- a/src/hash/crc32/crc32_test.go +++ b/src/hash/crc32/crc32_test.go @@ -6,7 +6,7 @@ package crc32 import ( "hash" - "io" + "math/rand" "testing" ) @@ -49,42 +49,150 @@ var golden = []test{ {0x8e0bb443, 0xdcded527, "How can you write a big system without C++? -Paul Glick"}, } +// testGoldenIEEE verifies that the given function returns +// correct IEEE checksums. +func testGoldenIEEE(t *testing.T, crcFunc func(b []byte) uint32) { + for _, g := range golden { + if crc := crcFunc([]byte(g.in)); crc != g.ieee { + t.Errorf("IEEE(%s) = 0x%x want 0x%x", g.in, crc, g.ieee) + } + } +} + +// testGoldenCastagnoli verifies that the given function returns +// correct IEEE checksums. +func testGoldenCastagnoli(t *testing.T, crcFunc func(b []byte) uint32) { + for _, g := range golden { + if crc := crcFunc([]byte(g.in)); crc != g.castagnoli { + t.Errorf("Castagnoli(%s) = 0x%x want 0x%x", g.in, crc, g.castagnoli) + } + } +} + +// testCrossCheck generates random buffers of various lengths and verifies that +// the two "update" functions return the same result. +func testCrossCheck(t *testing.T, crcFunc1, crcFunc2 func(crc uint32, b []byte) uint32) { + // The AMD64 implementation has some cutoffs at lengths 168*3=504 and + // 1344*3=4032. We should make sure lengths around these values are in the + // list. + lengths := []int{0, 1, 2, 3, 4, 5, 10, 16, 50, 100, 128, + 500, 501, 502, 503, 504, 505, 512, 1000, 1024, 2000, + 4030, 4031, 4032, 4033, 4036, 4040, 4048, 4096, 5000, 10000} + for _, length := range lengths { + p := make([]byte, length) + _, _ = rand.Read(p) + crcInit := uint32(rand.Int63()) + crc1 := crcFunc1(crcInit, p) + crc2 := crcFunc2(crcInit, p) + if crc1 != crc2 { + t.Errorf("mismatch: 0x%x vs 0x%x (buffer length %d)", crc1, crc2, length) + } + } +} + +// TestSimple tests the simple generic algorithm. +func TestSimple(t *testing.T) { + tab := simpleMakeTable(IEEE) + testGoldenIEEE(t, func(b []byte) uint32 { + return simpleUpdate(0, tab, b) + }) + + tab = simpleMakeTable(Castagnoli) + testGoldenCastagnoli(t, func(b []byte) uint32 { + return simpleUpdate(0, tab, b) + }) +} + +// TestSimple tests the slicing-by-8 algorithm. +func TestSlicing(t *testing.T) { + tab := slicingMakeTable(IEEE) + testGoldenIEEE(t, func(b []byte) uint32 { + return slicingUpdate(0, tab, b) + }) + + tab = slicingMakeTable(Castagnoli) + testGoldenCastagnoli(t, func(b []byte) uint32 { + return slicingUpdate(0, tab, b) + }) + + // Cross-check various polys against the simple algorithm. + for _, poly := range []uint32{IEEE, Castagnoli, Koopman, 0xD5828281} { + t1 := simpleMakeTable(poly) + f1 := func(crc uint32, b []byte) uint32 { + return simpleUpdate(crc, t1, b) + } + t2 := slicingMakeTable(poly) + f2 := func(crc uint32, b []byte) uint32 { + return slicingUpdate(crc, t2, b) + } + testCrossCheck(t, f1, f2) + } +} + +func TestArchIEEE(t *testing.T) { + if !archAvailableIEEE() { + t.Skip("Arch-specific IEEE not available.") + } + archInitIEEE() + slicingTable := slicingMakeTable(IEEE) + testCrossCheck(t, archUpdateIEEE, func(crc uint32, b []byte) uint32 { + return slicingUpdate(crc, slicingTable, b) + }) +} + +func TestArchCastagnoli(t *testing.T) { + if !archAvailableCastagnoli() { + t.Skip("Arch-specific Castagnoli not available.") + } + archInitCastagnoli() + slicingTable := slicingMakeTable(Castagnoli) + testCrossCheck(t, archUpdateCastagnoli, func(crc uint32, b []byte) uint32 { + return slicingUpdate(crc, slicingTable, b) + }) +} + func TestGolden(t *testing.T) { + testGoldenIEEE(t, ChecksumIEEE) + + // Some implementations have special code to deal with misaligned + // data; test that as well. + for delta := 1; delta <= 7; delta++ { + testGoldenIEEE(t, func(b []byte) uint32 { + ieee := NewIEEE() + d := delta + if d >= len(b) { + d = len(b) + } + ieee.Write(b[:d]) + ieee.Write(b[d:]) + return ieee.Sum32() + }) + } + castagnoliTab := MakeTable(Castagnoli) if castagnoliTab == nil { t.Errorf("nil Castagnoli Table") } - for _, g := range golden { - ieee := NewIEEE() - io.WriteString(ieee, g.in) - s := ieee.Sum32() - if s != g.ieee { - t.Errorf("IEEE(%s) = 0x%x want 0x%x", g.in, s, g.ieee) - } - + testGoldenCastagnoli(t, func(b []byte) uint32 { castagnoli := New(castagnoliTab) - io.WriteString(castagnoli, g.in) - s = castagnoli.Sum32() - if s != g.castagnoli { - t.Errorf("Castagnoli(%s) = 0x%x want 0x%x", g.in, s, g.castagnoli) - } + castagnoli.Write(b) + return castagnoli.Sum32() + }) - // The SSE4.2 implementation of this has code to deal - // with misaligned data so we ensure that we test that - // too. - for delta := 1; delta <= 7; delta++ { - if len(g.in) > delta { - in := []byte(g.in) - castagnoli = New(castagnoliTab) - castagnoli.Write(in[:delta]) - castagnoli.Write(in[delta:]) - s = castagnoli.Sum32() - if s != g.castagnoli { - t.Errorf("Castagnoli[misaligned](%s) = 0x%x want 0x%x", g.in, s, g.castagnoli) - } + // Some implementations have special code to deal with misaligned + // data; test that as well. + for delta := 1; delta <= 7; delta++ { + testGoldenCastagnoli(t, func(b []byte) uint32 { + castagnoli := New(castagnoliTab) + d := delta + if d >= len(b) { + d = len(b) } - } + castagnoli.Write(b[:d]) + castagnoli.Write(b[d:]) + return castagnoli.Sum32() + }) } } -- 2.48.1