struct
{
char *name;
+ /*
+ * type is the lexical type to return. It dictates what kind of
+ * operands 6a allows to follow it (in a.y) as the possible operand
+ * types are handled by a grammar. How do you know which LTYPE?
+ * Either read a.y or think of an instruction that has the same
+ * possible operands and look up what it takes.
+ */
ushort type;
ushort value;
} itab[] =
"UNPCKLPS", LTYPE3, AUNPCKLPS,
"XORPD", LTYPE3, AXORPD,
"XORPS", LTYPE3, AXORPS,
+ "CRC32B", LTYPE4, ACRC32B,
+ "CRC32Q", LTYPE4, ACRC32Q,
0
};
ASWAPGS,
AMODE,
+ ACRC32B,
+ ACRC32Q,
ALAST
};
Zxxx = 0,
Zlit,
+ Zlitm_r,
Z_rp,
Zbr,
Zcall,
Ymr, Yrl, Zm_r_xm, 1,
0
};
+uchar ycrc32l[] =
+{
+ Yml, Yrl, Zlitm_r, 0,
+};
+/*
+ * You are doasm, holding in your hand a Prog* with p->as set to, say, ACRC32,
+ * and p->from and p->to as operands (Adr*). The linker scans optab to find
+ * the entry with the given p->as and then looks through the ytable for that
+ * instruction (the second field in the optab struct) for a line whose first
+ * two values match the Ytypes of the p->from and p->to operands. The function
+ * oclass in span.c computes the specific Ytype of an operand and then the set
+ * of more general Ytypes that it satisfies is implied by the ycover table, set
+ * up in instinit. For example, oclass distinguishes the constants 0 and 1
+ * from the more general 8-bit constants, but instinit says
+ *
+ * ycover[Yi0*Ymax + Ys32] = 1;
+ * ycover[Yi1*Ymax + Ys32] = 1;
+ * ycover[Yi8*Ymax + Ys32] = 1;
+ *
+ * which means that Yi0, Yi1, and Yi8 all count as Ys32 (signed 32)
+ * if that's what an instruction can handle.
+ *
+ * In parallel with the scan through the ytable for the appropriate line, there
+ * is a z pointer that starts out pointing at the strange magic byte list in
+ * the Optab struct. With each step past a non-matching ytable line, z
+ * advances by the 4th entry in the line. When a matching line is found, that
+ * z pointer has the extra data to use in laying down the instruction bytes.
+ * The actual bytes laid down are a function of the 3rd entry in the line (that
+ * is, the Ztype) and the z bytes.
+ *
+ * For example, let's look at AADDL. The optab line says:
+ * { AADDL, yaddl, Px, 0x83,(00),0x05,0x81,(00),0x01,0x03 },
+ *
+ * and yaddl says
+ * uchar yaddl[] =
+ * {
+ * Yi8, Yml, Zibo_m, 2,
+ * Yi32, Yax, Zil_, 1,
+ * Yi32, Yml, Zilo_m, 2,
+ * Yrl, Yml, Zr_m, 1,
+ * Yml, Yrl, Zm_r, 1,
+ * 0
+ * };
+ *
+ * so there are 5 possible types of ADDL instruction that can be laid down, and
+ * possible states used to lay them down (Ztype and z pointer, assuming z
+ * points at {0x83,(00),0x05,0x81,(00),0x01,0x03}) are:
+ *
+ * Yi8, Yml -> Zibo_m, z (0x83, 00)
+ * Yi32, Yax -> Zil_, z+2 (0x05)
+ * Yi32, Yml -> Zilo_m, z+2+1 (0x81, 0x00)
+ * Yrl, Yml -> Zr_m, z+2+1+2 (0x01)
+ * Yml, Yrl -> Zm_r, z+2+1+2+1 (0x03)
+ *
+ * The Pconstant in the optab line controls the prefix bytes to emit. That's
+ * relatively straightforward as this program goes.
+ *
+ * The switch on t[2] in doasm implements the various Z cases. Zibo_m, for
+ * example, is an opcode byte (z[0]) then an asmando (which is some kind of
+ * encoded addressing mode for the Yml arg), and then a single immediate byte.
+ * Zilo_m is the same but a long (32-bit) immediate.
+ */
Optab optab[] =
/* as, ytab, andproto, opcode */
{
{ AXADDQ, yrl_ml, Pw, 0x0f,0xc1 },
{ AXADDW, yrl_ml, Pe, 0x0f,0xc1 },
+ { ACRC32B, ycrc32l,Px, 0xf2,0x0f,0x38,0xf0,0},
+ { ACRC32Q, ycrc32l,Pw, 0xf2,0x0f,0x38,0xf1,0},
+
{ AEND },
0
};
*andptr++ = op;
break;
+ case Zlitm_r:
+ for(; op = o->op[z]; z++)
+ *andptr++ = op;
+ asmand(&p->from, &p->to);
+ break;
+
case Zmb_r:
bytereg(&p->from, &p->ft);
/* fall through */
include ../../../Make.inc
TARG=hash/crc32
+
+ifeq ($(GOARCH), amd64)
+ ARCH_GOFILES=crc32_amd64.go
+ OFILES=crc32_amd64.6
+else
+ ARCH_GOFILES=crc32_generic.go
+endif
+
GOFILES=\
crc32.go\
+ $(ARCH_GOFILES)
include ../../../Make.pkg
import (
"hash"
"os"
+ "sync"
)
// The size of a CRC-32 checksum in bytes.
// Table is a 256-word table representing the polynomial for efficient processing.
type Table [256]uint32
+// castagnoliTable points to a lazily initialized Table for the Castagnoli
+// polynomial. MakeTable will always return this value when asked to make a
+// Castagnoli table so we can compare against it to find when the caller is
+// using this polynomial.
+var castagnoliTable *Table
+var castagnoliOnce sync.Once
+
+func castagnoliInit() {
+ castagnoliTable = makeTable(Castagnoli)
+}
+
+// IEEETable is the table for the IEEE polynomial.
+var IEEETable = makeTable(IEEE)
+
// MakeTable returns the Table constructed from the specified polynomial.
func MakeTable(poly uint32) *Table {
+ switch poly {
+ case IEEE:
+ return IEEETable
+ case Castagnoli:
+ castagnoliOnce.Do(castagnoliInit)
+ return castagnoliTable
+ }
+ return makeTable(poly)
+}
+
+// makeTable returns the Table constructed from the specified polynomial.
+func makeTable(poly uint32) *Table {
t := new(Table)
for i := 0; i < 256; i++ {
crc := uint32(i)
return t
}
-// IEEETable is the table for the IEEE polynomial.
-var IEEETable = MakeTable(IEEE)
-
// digest represents the partial evaluation of a checksum.
type digest struct {
crc uint32
// Update returns the result of adding the bytes in p to the crc.
func Update(crc uint32, tab *Table, p []byte) uint32 {
+ if tab == castagnoliTable {
+ return updateCastagnoli(crc, p)
+ }
return update(crc, tab, p)
}
func (d *digest) Write(p []byte) (n int, err os.Error) {
- d.crc = update(d.crc, d.tab, p)
+ d.crc = Update(d.crc, d.tab, p)
return len(p), nil
}
// Checksum returns the CRC-32 checksum of data
// using the polynomial represented by the Table.
-func Checksum(data []byte, tab *Table) uint32 { return update(0, tab, data) }
+func Checksum(data []byte, tab *Table) uint32 { return Update(0, tab, data) }
// ChecksumIEEE returns the CRC-32 checksum of data
// using the IEEE polynomial.
--- /dev/null
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package crc32
+
+// This file contains the code to call the SSE 4.2 version of the Castagnoli
+// CRC.
+
+// haveSSE42 is defined in crc_amd64.s and uses CPUID to test for SSE 4.2
+// support.
+func haveSSE42() bool
+
+// castagnoliSSE42 is defined in crc_amd64.s and uses the SSE4.2 CRC32
+// instruction.
+func castagnoliSSE42(uint32, []byte) uint32
+
+var sse42 = haveSSE42()
+
+func updateCastagnoli(crc uint32, p []byte) uint32 {
+ if sse42 {
+ return castagnoliSSE42(crc, p)
+ }
+ return update(crc, castagnoliTable, p)
+}
--- /dev/null
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// func castagnoliSSE42(crc uint32, p []byte) uint32
+TEXT ·castagnoliSSE42(SB),7,$0
+ MOVL crc+0(FP), AX // CRC value
+ MOVQ p+8(FP), SI // data pointer
+ MOVL p+16(FP), CX // len(p)
+
+ NOTL AX
+
+ /* If there's less than 8 bytes to process, we do it byte-by-byte. */
+ CMPL CX, $8
+ JL cleanup
+
+ /* Process individual bytes until the input is 8-byte aligned. */
+startup:
+ MOVQ SI, BX
+ ANDQ $7, BX
+ JZ aligned
+
+ CRC32B (SI), AX
+ DECL CX
+ INCQ SI
+ JMP startup
+
+aligned:
+ /* The input is now 8-byte aligned and we can process 8-byte chunks. */
+ CMPL CX, $8
+ JL cleanup
+
+ CRC32Q (SI), AX
+ ADDQ $8, SI
+ SUBQ $8, CX
+ JMP aligned
+
+cleanup:
+ /* We may have some bytes left over that we process one at a time. */
+ CMPL CX, $0
+ JE done
+
+ CRC32B (SI), AX
+ INCQ SI
+ DECQ CX
+ JMP cleanup
+
+done:
+ NOTL AX
+ MOVL AX, ret+24(FP)
+ RET
+
+// func haveSSE42() bool
+TEXT ·haveSSE42(SB),7,$0
+ XORQ AX, AX
+ INCL AX
+ CPUID
+ SHRQ $20, CX
+ ANDQ $1, CX
+ MOVB CX, ret+0(FP)
+ RET
+
--- /dev/null
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package crc32
+
+// The file contains the generic version of updateCastagnoli which just calls
+// the software implementation.
+
+func updateCastagnoli(crc uint32, p []byte) uint32 {
+ return update(crc, castagnoliTable, p)
+}
)
type test struct {
- out uint32
- in string
+ ieee, castagnoli uint32
+ in string
}
var golden = []test{
- {0x0, ""},
- {0xe8b7be43, "a"},
- {0x9e83486d, "ab"},
- {0x352441c2, "abc"},
- {0xed82cd11, "abcd"},
- {0x8587d865, "abcde"},
- {0x4b8e39ef, "abcdef"},
- {0x312a6aa6, "abcdefg"},
- {0xaeef2a50, "abcdefgh"},
- {0x8da988af, "abcdefghi"},
- {0x3981703a, "abcdefghij"},
- {0x6b9cdfe7, "Discard medicine more than two years old."},
- {0xc90ef73f, "He who has a shady past knows that nice guys finish last."},
- {0xb902341f, "I wouldn't marry him with a ten foot pole."},
- {0x42080e8, "Free! Free!/A trip/to Mars/for 900/empty jars/Burma Shave"},
- {0x154c6d11, "The days of the digital watch are numbered. -Tom Stoppard"},
- {0x4c418325, "Nepal premier won't resign."},
- {0x33955150, "For every action there is an equal and opposite government program."},
- {0x26216a4b, "His money is twice tainted: 'taint yours and 'taint mine."},
- {0x1abbe45e, "There is no reason for any individual to have a computer in their home. -Ken Olsen, 1977"},
- {0xc89a94f7, "It's a tiny change to the code and not completely disgusting. - Bob Manchek"},
- {0xab3abe14, "size: a.out: bad magic"},
- {0xbab102b6, "The major problem is with sendmail. -Mark Horton"},
- {0x999149d7, "Give me a rock, paper and scissors and I will move the world. CCFestoon"},
- {0x6d52a33c, "If the enemy is within range, then so are you."},
- {0x90631e8d, "It's well we cannot hear the screams/That we create in others' dreams."},
- {0x78309130, "You remind me of a TV show, but that's all right: I watch it anyway."},
- {0x7d0a377f, "C is as portable as Stonehedge!!"},
- {0x8c79fd79, "Even if I could be Shakespeare, I think I should still choose to be Faraday. - A. Huxley"},
- {0xa20b7167, "The fugacity of a constituent in a mixture of gases at a given temperature is proportional to its mole fraction. Lewis-Randall Rule"},
- {0x8e0bb443, "How can you write a big system without C++? -Paul Glick"},
+ {0x0, 0x0, ""},
+ {0xe8b7be43, 0xc1d04330, "a"},
+ {0x9e83486d, 0xe2a22936, "ab"},
+ {0x352441c2, 0x364b3fb7, "abc"},
+ {0xed82cd11, 0x92c80a31, "abcd"},
+ {0x8587d865, 0xc450d697, "abcde"},
+ {0x4b8e39ef, 0x53bceff1, "abcdef"},
+ {0x312a6aa6, 0xe627f441, "abcdefg"},
+ {0xaeef2a50, 0xa9421b7, "abcdefgh"},
+ {0x8da988af, 0x2ddc99fc, "abcdefghi"},
+ {0x3981703a, 0xe6599437, "abcdefghij"},
+ {0x6b9cdfe7, 0xb2cc01fe, "Discard medicine more than two years old."},
+ {0xc90ef73f, 0xe28207f, "He who has a shady past knows that nice guys finish last."},
+ {0xb902341f, 0xbe93f964, "I wouldn't marry him with a ten foot pole."},
+ {0x42080e8, 0x9e3be0c3, "Free! Free!/A trip/to Mars/for 900/empty jars/Burma Shave"},
+ {0x154c6d11, 0xf505ef04, "The days of the digital watch are numbered. -Tom Stoppard"},
+ {0x4c418325, 0x85d3dc82, "Nepal premier won't resign."},
+ {0x33955150, 0xc5142380, "For every action there is an equal and opposite government program."},
+ {0x26216a4b, 0x75eb77dd, "His money is twice tainted: 'taint yours and 'taint mine."},
+ {0x1abbe45e, 0x91ebe9f7, "There is no reason for any individual to have a computer in their home. -Ken Olsen, 1977"},
+ {0xc89a94f7, 0xf0b1168e, "It's a tiny change to the code and not completely disgusting. - Bob Manchek"},
+ {0xab3abe14, 0x572b74e2, "size: a.out: bad magic"},
+ {0xbab102b6, 0x8a58a6d5, "The major problem is with sendmail. -Mark Horton"},
+ {0x999149d7, 0x9c426c50, "Give me a rock, paper and scissors and I will move the world. CCFestoon"},
+ {0x6d52a33c, 0x735400a4, "If the enemy is within range, then so are you."},
+ {0x90631e8d, 0xbec49c95, "It's well we cannot hear the screams/That we create in others' dreams."},
+ {0x78309130, 0xa95a2079, "You remind me of a TV show, but that's all right: I watch it anyway."},
+ {0x7d0a377f, 0xde2e65c5, "C is as portable as Stonehedge!!"},
+ {0x8c79fd79, 0x297a88ed, "Even if I could be Shakespeare, I think I should still choose to be Faraday. - A. Huxley"},
+ {0xa20b7167, 0x66ed1d8b, "The fugacity of a constituent in a mixture of gases at a given temperature is proportional to its mole fraction. Lewis-Randall Rule"},
+ {0x8e0bb443, 0xdcded527, "How can you write a big system without C++? -Paul Glick"},
}
func TestGolden(t *testing.T) {
- for i := 0; i < len(golden); i++ {
- g := golden[i]
- c := NewIEEE()
- io.WriteString(c, g.in)
- s := c.Sum32()
- if s != g.out {
- t.Errorf("crc32(%s) = 0x%x want 0x%x", g.in, s, g.out)
- t.FailNow()
+ castagnoliTab := MakeTable(Castagnoli)
+
+ for _, g := range golden {
+ ieee := NewIEEE()
+ io.WriteString(ieee, g.in)
+ s := ieee.Sum32()
+ if s != g.ieee {
+ t.Errorf("IEEE(%s) = 0x%x want 0x%x", g.in, s, g.ieee)
+ }
+
+ castagnoli := New(castagnoliTab)
+ io.WriteString(castagnoli, g.in)
+ s = castagnoli.Sum32()
+ if s != g.castagnoli {
+ t.Errorf("Castagnoli(%s) = 0x%x want 0x%x", g.in, s, g.castagnoli)
+ }
+
+ if len(g.in) > 0 {
+ // The SSE4.2 implementation of this has code to deal
+ // with misaligned data so we ensure that we test that
+ // too.
+ castagnoli = New(castagnoliTab)
+ io.WriteString(castagnoli, g.in[:1])
+ io.WriteString(castagnoli, g.in[1:])
+ s = castagnoli.Sum32()
+ if s != g.castagnoli {
+ t.Errorf("Castagnoli[misaligned](%s) = 0x%x want 0x%x", g.in, s, g.castagnoli)
+ }
}
}
}
}
c := NewIEEE()
b.StartTimer()
+ b.SetBytes(int64(len(data)))
for i := 0; i < b.N; i++ {
c.Write(data)