From: Russ Cox Date: Thu, 17 Sep 2009 01:14:18 +0000 (-0700) Subject: first step toward cgo tool. X-Git-Tag: weekly.2009-11-06~558 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=0b7878a96ef3b4c8866401d5a94bc74caaeed370;p=gostls13.git first step toward cgo tool. can extract import "C" doc comment and all references to C package. ; cgo gmp.go | sort #include gmp.go:197:4: mpz_t as type gmp.go:205:2: mpz_init as call gmp.go:206:2: mpz_set as call gmp.go:221:2: mpz_init as call gmp.go:227:7: size_t as call gmp.go:228:2: mpz_export as call gmp.go:235:13: mpz_sizeinbase as call gmp.go:241:2: mpz_set as call gmp.go:252:3: mpz_import as call gmp.go:261:2: mpz_set_si as call gmp.go:273:5: mpz_set_str as call gmp.go:282:9: mpz_get_str as call gmp.go:287:3: mpz_clear as call gmp.go:302:2: mpz_add as call gmp.go:311:2: mpz_sub as call gmp.go:320:2: mpz_mul as call gmp.go:329:2: mpz_tdiv_q as call gmp.go:339:2: mpz_tdiv_r as call gmp.go:348:2: mpz_mul_2exp as call gmp.go:356:2: mpz_div_2exp as call gmp.go:367:3: mpz_pow as call gmp.go:369:3: mpz_powm as call gmp.go:378:2: mpz_neg as call gmp.go:386:2: mpz_abs as call gmp.go:404:9: mpz_cmp as call gmp.go:413:2: mpz_tdiv_qr as call gmp.go:426:2: mpz_gcdext as call ; R=r DELTA=746 (746 added, 0 deleted, 0 changed) OCL=34710 CL=34714 --- diff --git a/src/cmd/cgo/Makefile b/src/cmd/cgo/Makefile new file mode 100644 index 0000000000..8d9e11e539 --- /dev/null +++ b/src/cmd/cgo/Makefile @@ -0,0 +1,11 @@ +# Copyright 2009 The Go Authors. All rights reserved. +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +include $(GOROOT)/src/Make.$(GOARCH) + +TARG=cgo +GOFILES=\ + cgo.go\ + +include $(GOROOT)/src/Make.cmd diff --git a/src/cmd/cgo/cgo.go b/src/cmd/cgo/cgo.go new file mode 100644 index 0000000000..f174b7854f --- /dev/null +++ b/src/cmd/cgo/cgo.go @@ -0,0 +1,308 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Cgo; see gmp.go for an overview. + +// TODO(rsc): +// Emit correct line number annotations. +// Make 6g understand the annotations. +package main + +import ( + "bufio"; + "container/vector"; + "debug/dwarf"; + "debug/elf"; + "flag"; + "fmt"; + "go/ast"; + "go/doc"; + "go/parser"; + "go/scanner"; + "go/token"; + "io"; + "os"; +) + +// Map of uses of C.xxx. The key is the pointer +// to the use (a pointer so it can be rewritten) +// and the value is the context ("call", "expr", "type"). +type cmap map[*ast.Expr] string + +var noPos token.Position + +func usage() { + fmt.Fprint(os.Stderr, "usage: cgo [options] file.cgo\n"); + flag.PrintDefaults(); +} + +func main() { + flag.Usage = usage; + flag.Parse(); + + args := flag.Args(); + if len(args) != 1 { + flag.Usage(); + } + filename := args[0]; + + prog, err := parser.ParsePkgFile("", filename, parser.ParseComments); + if err != nil { + fatal(err); + } + + // Find the import "C" line and get any extra C preamble. + preamble := ""; + found := false; + for _, d := range prog.Decls { + d, ok := d.(*ast.GenDecl); + if !ok { + continue; + } + for _, s := range d.Specs { + s, ok := s.(*ast.ImportSpec); + if !ok { + continue; + } + if len(s.Path) != 1 || string(s.Path[0].Value) != `"C"` { + continue; + } + found = true; + if s.Name != nil { + error(s.Path[0].Pos(), `cannot rename import "C"`); + } + if s.Doc != nil { + preamble += doc.CommentText(s.Doc) + "\n"; + } + else if len(d.Specs) == 1 && d.Doc != nil { + preamble += doc.CommentText(d.Doc) + "\n"; + } + } + } + if !found { + error(noPos, `cannot find import "C"`); + } + + // Accumulate pointers to uses of C.x. + m := make(cmap); + walk(prog, m, "prog"); + + fmt.Print(preamble); + for p, context := range m { + sel := (*p).(*ast.SelectorExpr); + fmt.Printf("%s: %s as %s\n", sel.Pos(), sel.Sel.Value, context); + } +} + +func walk(x interface{}, m cmap, context string) { + switch n := x.(type) { + case *ast.Expr: + if sel, ok := (*n).(*ast.SelectorExpr); ok { + // For now, assume that the only instance of capital C is + // when used as the imported package identifier. + // The parser should take care of scoping in the future, + // so that we will be able to distinguish a "top-level C" + // from a local C. + if l, ok := sel.X.(*ast.Ident); ok && l.Value == "C" { + m[n] = context; + break; + } + } + walk(*n, m, context); + + // everything else just recurs + default: + error(noPos, "unexpected type %T in walk", x); + panic(); + + case nil: + + // These are ordered and grouped to match ../../pkg/go/ast/ast.go + case *ast.Field: + walk(&n.Type, m, "type"); + case *ast.BadExpr: + case *ast.Ident: + case *ast.Ellipsis: + case *ast.BasicLit: + case *ast.StringList: + case *ast.FuncLit: + walk(n.Type, m, "type"); + walk(n.Body, m, "stmt"); + case *ast.CompositeLit: + walk(&n.Type, m, "type"); + walk(n.Elts, m, "expr"); + case *ast.ParenExpr: + walk(&n.X, m, context); + case *ast.SelectorExpr: + walk(&n.X, m, "selector"); + case *ast.IndexExpr: + walk(&n.X, m, "expr"); + walk(&n.Index, m, "expr"); + if n.End != nil { + walk(&n.End, m, "expr"); + } + case *ast.TypeAssertExpr: + walk(&n.X, m, "expr"); + walk(&n.Type, m, "type"); + case *ast.CallExpr: + walk(&n.Fun, m, "call"); + walk(n.Args, m, "expr"); + case *ast.StarExpr: + walk(&n.X, m, context); + case *ast.UnaryExpr: + walk(&n.X, m, "expr"); + case *ast.BinaryExpr: + walk(&n.X, m, "expr"); + walk(&n.Y, m, "expr"); + case *ast.KeyValueExpr: + walk(&n.Key, m, "expr"); + walk(&n.Value, m, "expr"); + + case *ast.ArrayType: + walk(&n.Len, m, "expr"); + walk(&n.Elt, m, "type"); + case *ast.StructType: + walk(n.Fields, m, "field"); + case *ast.FuncType: + walk(n.Params, m, "field"); + walk(n.Results, m, "field"); + case *ast.InterfaceType: + walk(n.Methods, m, "field"); + case *ast.MapType: + walk(&n.Key, m, "type"); + walk(&n.Value, m, "type"); + case *ast.ChanType: + walk(&n.Value, m, "type"); + + case *ast.BadStmt: + case *ast.DeclStmt: + walk(n.Decl, m, "decl"); + case *ast.EmptyStmt: + case *ast.LabeledStmt: + walk(n.Stmt, m, "stmt"); + case *ast.ExprStmt: + walk(&n.X, m, "expr"); + case *ast.IncDecStmt: + walk(&n.X, m, "expr"); + case *ast.AssignStmt: + walk(n.Lhs, m, "expr"); + walk(n.Rhs, m, "expr"); + case *ast.GoStmt: + walk(&n.Call, m, "expr"); + case *ast.DeferStmt: + walk(&n.Call, m, "expr"); + case *ast.ReturnStmt: + walk(n.Results, m, "expr"); + case *ast.BranchStmt: + case *ast.BlockStmt: + walk(n.List, m, "stmt"); + case *ast.IfStmt: + walk(n.Init, m, "stmt"); + walk(&n.Cond, m, "expr"); + walk(n.Body, m, "stmt"); + walk(n.Else, m, "stmt"); + case *ast.CaseClause: + walk(n.Values, m, "expr"); + walk(n.Body, m, "stmt"); + case *ast.SwitchStmt: + walk(n.Init, m, "stmt"); + walk(&n.Tag, m, "expr"); + walk(n.Body, m, "stmt"); + case *ast.TypeCaseClause: + walk(n.Types, m, "type"); + walk(n.Body, m, "stmt"); + case *ast.TypeSwitchStmt: + walk(n.Init, m, "stmt"); + walk(n.Assign, m, "stmt"); + walk(n.Body, m, "stmt"); + case *ast.CommClause: + walk(n.Lhs, m, "expr"); + walk(n.Rhs, m, "expr"); + walk(n.Body, m, "stmt"); + case *ast.SelectStmt: + walk(n.Body, m, "stmt"); + case *ast.ForStmt: + walk(n.Init, m, "stmt"); + walk(&n.Cond, m, "expr"); + walk(n.Post, m, "stmt"); + walk(n.Body, m, "stmt"); + case *ast.RangeStmt: + walk(&n.Key, m, "expr"); + walk(&n.Value, m, "expr"); + walk(&n.X, m, "expr"); + walk(n.Body, m, "stmt"); + + case *ast.ImportSpec: + case *ast.ValueSpec: + walk(&n.Type, m, "type"); + walk(n.Values, m, "expr"); + case *ast.TypeSpec: + walk(&n.Type, m, "type"); + + case *ast.BadDecl: + case *ast.GenDecl: + walk(n.Specs, m, "spec"); + case *ast.FuncDecl: + if n.Recv != nil { + walk(n.Recv, m, "field"); + } + walk(n.Type, m, "type"); + walk(n.Body, m, "stmt"); + + case *ast.File: + walk(n.Decls, m, "decl"); + + case *ast.Package: + for _, f := range n.Files { + walk(f, m, "file"); + } + + case []ast.Decl: + for _, d := range n { + walk(d, m, context); + } + case []ast.Expr: + for i := range n { + walk(&n[i], m, context); + } + case []*ast.Field: + for _, f := range n { + walk(f, m, context); + } + case []ast.Stmt: + for _, s := range n { + walk(s, m, context); + } + case []ast.Spec: + for _, s := range n { + walk(s, m, context); + } + } +} + +func fatal(err os.Error) { + // If err is a scanner.ErrorList, its String will print just + // the first error and then (+n more errors). + // Instead, turn it into a new Error that will return + // details for all the errors. + if list, ok := err.(scanner.ErrorList); ok { + for _, e := range list { + fmt.Fprintln(os.Stderr, e); + } + } else { + fmt.Fprintln(os.Stderr, err); + } + os.Exit(2); +} + +var nerrors int + +func error(pos token.Position, msg string, args ...) { + nerrors++; + if pos.IsValid() { + fmt.Fprintf(os.Stderr, "%s: ", pos); + } + fmt.Fprintf(os.Stderr, msg, args); + fmt.Fprintf(os.Stderr, "\n"); +} diff --git a/src/cmd/cgo/gmp.go b/src/cmd/cgo/gmp.go new file mode 100644 index 0000000000..82e4b98a15 --- /dev/null +++ b/src/cmd/cgo/gmp.go @@ -0,0 +1,427 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +/* +An example of wrapping a C library in Go. This is the GNU +multiprecision library gmp's integer type mpz_t wrapped to look like +the Go package big's integer type Int. + +This is a syntactically valid Go program—it can be parsed with the Go +parser and processed by godoc—but it is not compiled directly by 6g. +Instead, a separate tool, cgo, processes it to produce three output +files. The first two, 6g.go and 6c.c, are a Go source file for 6g and +a C source file for 6c; both compile as part of the named package +(gmp, in this example). The third, gcc.c, is a C source file for gcc; +it compiles into a shared object (.so) that is dynamically linked into +any 6.out that imports the first two files. + +The stanza + + // #include + import "C" + +is a signal to cgo. The doc comment on the import of "C" provides +additional context for the C file. Here it is just a single #include +but it could contain arbitrary C definitions to be imported and used. + +Cgo recognizes any use of a qualified identifier C.xxx and uses gcc to +find the definition of xxx. If xxx is a type, cgo replaces C.xxx with +a Go translation. C arithmetic types translate to precisely-sized Go +arithmetic types. A C struct translates to a Go struct, field by +field; unrepresentable fields are replaced with opaque byte arrays. A +C union translates into a struct containing the first union member and +perhaps additional padding. C arrays become Go arrays. C pointers +become Go pointers. C function pointers and void pointers become Go's +*byte. + +For example, mpz_t is defined in as: + + typedef unsigned long int mp_limb_t; + + typedef struct + { + int _mp_alloc; + int _mp_size; + mp_limb_t *_mp_d; + } __mpz_struct; + + typedef __mpz_struct mpz_t[1]; + +Cgo generates: + + type _C_int int32 + type _C_mp_limb_t uint64 + type _C___mpz_struct struct { + _mp_alloc _C_int; + _mp_size _C_int; + _mp_d *_C_mp_limb_t; + } + type _C_mpz_t [1]_C___mpz_struct + +and then replaces each occurrence of a type C.xxx with _C_xxx. + +If xxx is data, cgo arranges for C.xxx to refer to the C variable, +with the type translated as described above. To do this, cgo must +introduce a Go variable that points at the C variable (the linker can +be told to initialize this pointer). For example, if the gmp library +provided + + mpz_t zero; + +then cgo would rewrite a reference to C.zero by introducing + + var _C_zero *C.mpz_t + +and then replacing all instances of C.zero with (*_C_zero). + +Cgo's most interesting translation is for functions. If xxx is a C +function, then cgo rewrites C.xxx into a new function _C_xxx that +calls the C xxx in a standard pthread. The new function translates +its arguments, calls xxx, and translates the return value. + +Translation of parameters and the return value follows the type +translation above with one extension: a function expecting a char* +will change to expect a string, and a function returning a char* will +change to return a string. The wrapper that cgo generates for the +first case allocates a new C string, passes that pointer to the C +function, and then frees the string when the function returns. The +wrapper for the second case assumes the char* being returned is +pointer that must be freed. It makes a Go string with a copy of the +contents and then frees the pointer. The char* conventions are a +useful heuristic; there should be some way to override them but isn't +yet. One can also imagine wrapping Go functions being passed into C +functions so that C can call them. + +Garbage collection is the big problem. It is fine for the Go world to +have pointers into the C world and to free those pointers when they +are no longer needed. To help, the garbage collector calls an +object's destroy() method prior to collecting it. C pointers can be +wrapped by Go objects with appropriate destroy methods. + +It is much more difficult for the C world to have pointers into the Go +world, because the Go garbage collector is unaware of the memory +allocated by C. I think the most important consideration is not to +constrain future implementations, so the rule is basically that Go +code can hand a Go pointer to C code but must separately arrange for +Go to hang on to a reference to the pointer until C is done with it. + +Note: the sketches assume that the char* <-> string conversions described +above have been thrown away. Otherwise one can't pass nil as the first +argument to mpz_get_str. + +Sketch of 6c.c: + + // NOTE: Maybe cgo is smart enough to figure out that + // mpz_init's real C name is __gmpz_init and use that instead. + + // Tell dynamic linker to initialize _cgo_mpz_init in this file + // to point at the function of the same name in gcc.c. + #pragma dynld _cgo_mpz_init _cgo_mpz_init "gmp.so" + #pragma dynld _cgo_mpz_get_str _cgo_mpz_get_str "gmp.so" + + void (*_cgo_mpz_init)(void*); + void (*_cgo_mpz_get_str)(void*); + + // implementation of Go function called as C.mpz_init below. + void + gmp·_C_mpz_init(struct { char x[8]; } p) // dummy struct, same size as 6g parameter frame + { + cgocall(_cgo_mpz_init, &p); + } + + void + gmp·_C_mpz_get_str(struct { char x[32]; } p) + { + cgocall(_cgo_mpz_get_str, &p); + } + +Sketch of 6g.go: + + // Type declarations from above, omitted. + + // Extern declarations for 6c.c functions + func _C_mpz_init(*_C_mpz_t) + func _C_mpz_get_str(*_C_char, int32, *_C_mpz_t) *_C_char + + // Original Go source with C.xxx replaced by _C_xxx + // as described above. + +Sketch of gcc.c: + + void + _cgo_mpz_init(void *v) + { + struct { + __mpz_struct *p1; // not mpz_t because of C array passing rule + } *a = v; + mpz_init(a->p1); + } + + void + _cgo_mpz_get_str(void *v) + { + struct { + char *p1; + int32 p2; + in32 _pad1; + __mpz_struct *p3; + char *p4; + } *a = v; + a->p4 = mpz_get_str(a->p1, a->p2, a->p3); + } + +Gmp defines mpz_t as __mpz_struct[1], meaning that if you +declare one it takes up a struct worth of space, but when you +pass one to a function, it passes a pointer to the space instead +of copying it. This can't be modeled directly in Go or in C structs +so some rewriting happens in the generated files. In Go, +the functions take *_C_mpz_t instead of _C_mpz_t, and in the +GCC structs, the parameters are __mpz_struct* instead of mpz_t. + +*/ + +package gmp + +// #include +import "C" + + +/* + * one of a kind + */ + +// An Int represents a signed multi-precision integer. +// The zero value for an Int represents the value 0. +type Int struct { + i C.mpz_t; + init bool; +} + +// NewInt returns a new Int initialized to x. +func NewInt(x int64) *Int { + z := new(Int); + z.init = true; + C.mpz_init(&z.i); + C.mpz_set(&z.i, x); + return z; +} + +// Int promises that the zero value is a 0, but in gmp +// the zero value is a crash. To bridge the gap, the +// init bool says whether this is a valid gmp value. +// doinit initializes z.i if it needs it. This is not inherent +// to FFI, just a mismatch between Go's convention of +// making zero values useful and gmp's decision not to. +func (z *Int) doinit() { + if z.init { + return; + } + z.init = true; + C.mpz_init(&z.i); +} + +// Bytes returns z's representation as a big-endian byte array. +func (z *Int) Bytes() []byte { + b := make([]byte, (z.Len() + 7) / 8); + n := C.size_t(len(b)); + C.mpz_export(&b[0], &n, 1, 1, 1, 0, &z.i); + return b[0:n]; +} + +// Len returns the length of z in bits. 0 is considered to have length 1. +func (z *Int) Len() int { + z.doinit(); + return int(C.mpz_sizeinbase(&z.i, 2)); +} + +// Set sets z = x and returns z. +func (z *Int) Set(x *Int) *Int { + z.doinit(); + C.mpz_set(&z.i, x); + return z; +} + +// SetBytes interprets b as the bytes of a big-endian integer +// and sets z to that value. +func (z *Int) SetBytes(b []byte) *Int { + z.doinit(); + if len(b) == 0 { + z.SetInt64(0); + } else { + C.mpz_import(&z.i, len(b), 1, 1, 1, 0, &b[0]); + } + return z; +} + +// SetInt64 sets z = x and returns z. +func (z *Int) SetInt64(x int64) *Int { + z.doinit(); + // TODO(rsc): more work on 32-bit platforms + C.mpz_set_si(z, x); + return z; +} + +// SetString interprets s as a number in the given base +// and sets z to that value. The base must be in the range [2,36]. +// SetString returns an error if s cannot be parsed or the base is invalid. +func (z *Int) SetString(s string, base int) os.Error { + z.doinit(); + if base < 2 || base > 36 { + return os.EINVAL; + } + if C.mpz_set_str(&z.i, s, base) < 0 { + return os.EINVAL; + } + return z; +} + +// String returns the decimal representation of z. +func (z *Int) String() string { + z.doinit(); + return C.mpz_get_str(nil, 10, &z.i); +} + +func (z *Int) destroy() { + if z.init { + C.mpz_clear(z); + } + z.init = false; +} + + +/* + * arithmetic + */ + +// Add sets z = x + y and returns z. +func (z *Int) Add(x, y *Int) *Int { + x.doinit(); + y.doinit(); + z.doinit(); + C.mpz_add(&z.i, &x.i, &y.i); + return z; +} + +// Sub sets z = x - y and returns z. +func (z *Int) Sub(x, y *Int) *Int { + x.doinit(); + y.doinit(); + z.doinit(); + C.mpz_sub(&z.i, &x.i, &y.i); + return z; +} + +// Mul sets z = x * y and returns z. +func (z *Int) Mul(x, y *Int) *Int { + x.doinit(); + y.doinit(); + z.doinit(); + C.mpz_mul(&z.i, &x.i, &y.i); + return z; +} + +// Div sets z = x / y, rounding toward zero, and returns z. +func (z *Int) Div(x, y *Int) *Int { + x.doinit(); + y.doinit(); + z.doinit(); + C.mpz_tdiv_q(&z.i, &x.i, &y.i); + return z; +} + +// Mod sets z = x % y and returns z. +// XXX Unlike in Go, the result is always positive. +func (z *Int) Mod(x, y *Int) *Int { + x.doinit(); + y.doinit(); + z.doinit(); + C.mpz_tdiv_r(&z.i, &x.i, &y.i); + return z; +} + +// Lsh sets z = x << s and returns z. +func (z *Int) Lsh(x *Int, s uint) *Int { + x.doinit(); + y.doinit(); + z.doinit(); + C.mpz_mul_2exp(&z.i, &x.i, s); +} + +// Rsh sets z = x >> s and returns z. +func (z *Int) Rsh(x *int, s uint) *Int { + x.doinit(); + y.doinit(); + z.doinit(); + C.mpz_div_2exp(&z.i, &x.i, s); +} + +// Exp sets z = x^y % m and returns z. +// If m == nil, Exp sets z = x^y. +func (z *Int) Exp(x, y, m *Int) *Int { + m.doinit(); + x.doinit(); + y.doinit(); + z.doinit(); + if m == nil { + C.mpz_pow(&z.i, &x.i, &y.i); + } else { + C.mpz_powm(&z.i, &x.i, &y.i, &m.i); + } + return z; +} + +// Neg sets z = -x and returns z. +func (z *Int) Neg(x *Int) *Int { + x.doinit(); + z.doinit(); + C.mpz_neg(&z.i, &x.i); + return z; +} + +// Abs sets z to the absolute value of x and returns z. +func (z *Int) Abs(x *Int) *Int { + x.doinit(); + z.doinit(); + C.mpz_abs(&z.i, &x.i); + return z; +} + + +/* + * functions without a clear receiver + */ + +// CmpInt compares x and y. The result is +// +// -1 if x < y +// 0 if x == y +// +1 if x > y +// +func CmpInt(x, y *Int) int { + x.doinit(); + y.doinit(); + return C.mpz_cmp(&x.i, &y.i); +} + +// DivModInt sets q = x / y and r = x % y. +func DivModInt(q, r, x, y *Int) { + q.doinit(); + r.doinit(); + x.doinit(); + y.doinit(); + C.mpz_tdiv_qr(&q.i, &r.i, &x.i, &y.i); +} + +// GcdInt sets d to the greatest common divisor of a and b, +// which must be positive numbers. +// If x and y are not nil, GcdInt sets x and y such that d = a*x + b*y. +// If either a or b is not positive, GcdInt sets d = x = y = 0. +func GcdInt(d, x, y, a, b *Int) { + d.doinit(); + x.doinit(); + y.doinit(); + a.doinit(); + b.doinit(); + C.mpz_gcdext(&d.i, &x.i, &y.i, &a.i, &b.i); +}