]> Cypherpunks repositories - bass.git/commitdiff
Deterministic pax archiver
authorSergey Matveev <stargrave@stargrave.org>
Thu, 27 Jun 2024 16:44:18 +0000 (19:44 +0300)
committerSergey Matveev <stargrave@stargrave.org>
Thu, 27 Jun 2024 17:03:05 +0000 (20:03 +0300)
build/contrib/detpax/.gitignore [new file with mode: 0644]
build/contrib/detpax/README [new file with mode: 0644]
build/contrib/detpax/basic.t [new file with mode: 0755]
build/contrib/detpax/build [new file with mode: 0755]
build/contrib/detpax/go.mod [new file with mode: 0644]
build/contrib/detpax/main.go [new file with mode: 0644]
build/lib/mk-pkg
build/lib/zip-hash/go.mod
doc/build/skelpkg.texi

diff --git a/build/contrib/detpax/.gitignore b/build/contrib/detpax/.gitignore
new file mode 100644 (file)
index 0000000..f65c310
--- /dev/null
@@ -0,0 +1 @@
+/detpax
diff --git a/build/contrib/detpax/README b/build/contrib/detpax/README
new file mode 100644 (file)
index 0000000..00e3680
--- /dev/null
@@ -0,0 +1,21 @@
+detpax -- deterministic pax-format archiver
+
+Reproducible package building expects packages to be bit to bit
+identical on each rebuilding. Ordinary tar archive invocation will
+store file mtimes, UIDs/GIDs, which ruins reproducible builds. Even
+filesystem may list files in directories in different order.
+
+https://reproducible-builds.org/docs/archives/ shows various
+possibilities of reproducible ustar archive creation. Unfortunately
+not everything (like big files) can be saved in ustar format. Forced
+pax format creation leads to inclusion of precise timestamps in bsdtar.
+And there are no options to skip that behaviour.
+
+detpax stores only minimal set of metainformation. Directories walking
+is done in a sorted way. No UIDs/GIDs or any kind of timestamps are
+stored, but permissions with sticky/setuid/setgid bits.
+
+It also has ability to set higher precedence ordering for some
+directories. For example BASS packages stores their dependencies as
+hook-scripts in skelpkg subdirectory. With "-prec skelpkg" it will
+be saved in archive first for quicker finding them in archives.
diff --git a/build/contrib/detpax/basic.t b/build/contrib/detpax/basic.t
new file mode 100755 (executable)
index 0000000..337e465
--- /dev/null
@@ -0,0 +1,106 @@
+#!/bin/sh
+
+testname=`basename "$0"`
+test_description="Basic tests"
+. $SHARNESS_TEST_SRCDIR/sharness.sh
+
+mkdir prefix
+cd prefix
+
+list() {
+    tar tvf - | perl -ane '
+        print "$F[0] ";
+        my $i=1;
+        for (; $i < $#F; $i++) {
+            last if (substr($F[$i], 0, 6) eq "prefix");
+        };
+        print join " ", @F[$i..$#F];
+        print "\n";
+    '
+}
+
+mkdir dir
+echo hello >dir/hw
+mkdir dir/subdir
+ln dir/hw dir/subdir/hw-linked
+ln -s unexistent dir/symlink
+
+mkdir prec0
+touch prec0/file
+ln dir/hw prec0/hw-linked
+
+mkdir prec0/prec1
+touch prec0/prec1/file
+ln dir/hw prec0/hw-linked
+
+cd ..
+echo world >just-a-file
+chmod -R go-rwx prefix just-a-file
+chmod g+rx prefix
+chmod +t prefix/dir
+chmod o+w prefix/prec0
+chmod o+r prefix/dir/hw
+
+detpax prefix/ just-a-file | list >their
+cat >our <<EOF
+drwxr-x--- prefix
+drwx-----T prefix/dir
+drwx------ prefix/dir/subdir
+drwx----w- prefix/prec0
+drwx------ prefix/prec0/prec1
+-rw----r-- prefix/dir/hw
+lrwx------ prefix/dir/symlink -> unexistent
+-rw------- prefix/prec0/file
+-rw------- prefix/prec0/prec1/file
+hrw----r-- prefix/dir/subdir/hw-linked link to prefix/dir/hw
+hrw----r-- prefix/prec0/hw-linked link to prefix/dir/hw
+-rw------- just-a-file
+EOF
+test_expect_success "basic" "test_cmp our their"
+
+detpax -prec prec0 prefix/ just-a-file | list >their
+cat >our <<EOF
+drwxr-x--- prefix
+drwx-----T prefix/dir
+drwx------ prefix/dir/subdir
+drwx----w- prefix/prec0
+drwx------ prefix/prec0/prec1
+-rw----r-- prefix/dir/hw
+-rw------- prefix/prec0/file
+hrw----r-- prefix/prec0/hw-linked link to prefix/dir/hw
+-rw------- prefix/prec0/prec1/file
+lrwx------ prefix/dir/symlink -> unexistent
+hrw----r-- prefix/dir/subdir/hw-linked link to prefix/dir/hw
+-rw------- just-a-file
+EOF
+test_expect_success "prec0" "test_cmp our their"
+
+detpax -prec prec0/prec1 -prec prec0 prefix/ just-a-file |
+    tee arch.tar | list >their
+cat >our <<EOF
+drwxr-x--- prefix
+drwx-----T prefix/dir
+drwx------ prefix/dir/subdir
+drwx----w- prefix/prec0
+drwx------ prefix/prec0/prec1
+-rw------- prefix/prec0/prec1/file
+-rw----r-- prefix/dir/hw
+-rw------- prefix/prec0/file
+hrw----r-- prefix/prec0/hw-linked link to prefix/dir/hw
+lrwx------ prefix/dir/symlink -> unexistent
+hrw----r-- prefix/dir/subdir/hw-linked link to prefix/dir/hw
+-rw------- just-a-file
+EOF
+test_expect_success "prec1" "test_cmp our their"
+
+mkdir tmp
+cd tmp
+test_expect_success "unpack" "tar xf ../arch.tar"
+
+echo hello >our
+test_expect_success "hw.txt" "test_cmp our prefix/dir/hw"
+
+echo world >our
+test_expect_success "just-a-file" "test_cmp our just-a-file"
+
+test_done
diff --git a/build/contrib/detpax/build b/build/contrib/detpax/build
new file mode 100755 (executable)
index 0000000..342cc03
--- /dev/null
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+exec go build -ldflags=-s -o detpax
diff --git a/build/contrib/detpax/go.mod b/build/contrib/detpax/go.mod
new file mode 100644 (file)
index 0000000..872f6cd
--- /dev/null
@@ -0,0 +1,3 @@
+module go.cypherpunks.ru/bass/detpax
+
+go 1.22.4
diff --git a/build/contrib/detpax/main.go b/build/contrib/detpax/main.go
new file mode 100644 (file)
index 0000000..03ff1b5
--- /dev/null
@@ -0,0 +1,228 @@
+// Copyright (C) 2024 Sergey Matveev <stargrave@stargrave.org>
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, version 3 of the License.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+package main
+
+import (
+       "archive/tar"
+       "bufio"
+       "flag"
+       "fmt"
+       "io"
+       "io/fs"
+       "log"
+       "os"
+       "sort"
+       "strings"
+       "syscall"
+)
+
+const (
+       SkelpkgPrefix = "skelpkg"
+       DefBufSize    = 1 << 20
+       OrderForLinks = 1<<31 - 1
+       OrderForOther = OrderForLinks - 1
+)
+
+type Info struct {
+       name  string
+       link  string
+       size  int64
+       mode  fs.FileMode
+       order int
+}
+
+type Inode struct{ dev, ino uint64 }
+
+func walk(
+       root string,
+       precs []string,
+       files map[string]*Info,
+       inodes map[Inode]*Info,
+) {
+       fd, err := os.Open(root)
+       if err != nil {
+               log.Fatalln("can not open:", root, err)
+       }
+       defer fd.Close()
+       fi, err := fd.Stat()
+       if err != nil {
+               log.Fatalln("can not stat:", root, err)
+       }
+       files[root] = &Info{name: root, mode: fi.Mode()}
+       fis, err := fd.Readdir(0)
+       if err != nil {
+               log.Fatalln("can not readdir:", root, err)
+       }
+       sort.Slice(fis, func(i, j int) bool {
+               return fis[i].Name() < fis[j].Name()
+       })
+       defOrder := OrderForLinks
+       for i, prec := range precs {
+               if strings.HasPrefix(root, prec) {
+                       defOrder = 1 + 1 + 2*i
+                       break
+               }
+       }
+       for _, fi := range fis {
+               if fi.IsDir() {
+                       walk(root+"/"+fi.Name(), precs, files, inodes)
+                       continue
+               }
+               info := Info{name: root + "/" + fi.Name(), mode: fi.Mode()}
+               if (info.mode & fs.ModeSymlink) > 0 {
+                       info.link, err = os.Readlink(info.name)
+                       if err != nil {
+                               log.Fatalln("can not Readlink:", info.name, err)
+                       }
+                       info.order = OrderForOther
+                       files[info.name] = &info
+                       continue
+               }
+               if !info.mode.IsRegular() {
+                       continue
+               }
+               s := fi.Sys().(*syscall.Stat_t)
+               inode := Inode{s.Dev, s.Ino}
+               link := inodes[inode]
+               info.order = defOrder
+               if link == nil {
+                       inodes[inode] = &info
+                       info.size = fi.Size()
+                       if info.order == OrderForLinks {
+                               info.order = OrderForOther
+                       }
+               } else {
+                       files[link.name].order = info.order - 1
+                       info.link = link.name
+               }
+               files[info.name] = &info
+       }
+}
+
+func main() {
+       var precsOrig []string
+       flag.Func("prec", "Add directory with higher precedence",
+               func(s string) error {
+                       if s != "" {
+                               precsOrig = append(precsOrig, s)
+                       }
+                       return nil
+               })
+       flag.Usage = func() {
+               fmt.Fprintln(flag.CommandLine.Output(),
+                       `Usage: detpax [-prec PREC0] [-prec PRECx] {FILE|DIR} [...]
+
+Precedence directories must not include DIR name.
+PREC0 has higher precedence than following ones.
+    -prec foo/bar -prec foo dir
+will put dir/foo/bar first, dir/foo second, others next.`)
+               flag.PrintDefaults()
+       }
+       flag.Parse()
+       var fd *os.File
+       buf := make([]byte, DefBufSize)
+       bw := bufio.NewWriterSize(os.Stdout, DefBufSize)
+       w := tar.NewWriter(bw)
+       var err error
+       for _, root := range flag.Args() {
+               root = strings.TrimSuffix(root, "/")
+               precs := make([]string, 0, len(precsOrig))
+               for _, prec := range precsOrig {
+                       precs = append(precs, root+"/"+prec)
+               }
+               var files []*Info
+               {
+                       filesMap := map[string]*Info{}
+                       {
+                               fi, err := os.Lstat(root)
+                               if err != nil {
+                                       log.Fatalln("can not stat:", root, err)
+                               }
+                               if fi.IsDir() {
+                                       inodes := map[Inode]*Info{}
+                                       walk(root, precs, filesMap, inodes)
+                               } else if !fi.Mode().IsRegular() {
+                                       log.Fatalln("only directory or file expected:", root)
+                               } else {
+                                       filesMap[root] = &Info{
+                                               name: root,
+                                               mode: fi.Mode(),
+                                               size: fi.Size(),
+                                       }
+                               }
+                       }
+                       files = make([]*Info, 0, len(filesMap))
+                       for _, info := range filesMap {
+                               files = append(files, info)
+                       }
+               }
+               sort.Slice(files, func(i, j int) bool {
+                       if files[i].order == files[j].order {
+                               return files[i].name < files[j].name
+                       }
+                       return files[i].order < files[j].order
+               })
+
+               hdr := tar.Header{Format: tar.FormatPAX}
+               for _, info := range files {
+                       hdr.Name = info.name
+                       hdr.Mode = int64(info.mode & 0777)
+                       if info.mode&fs.ModeSticky > 0 {
+                               hdr.Mode |= 01000
+                       }
+                       if info.mode&fs.ModeSetgid > 0 {
+                               hdr.Mode |= 02000
+                       }
+                       if info.mode&fs.ModeSetuid > 0 {
+                               hdr.Mode |= 04000
+                       }
+                       hdr.Size = info.size
+                       if info.link == "" {
+                               hdr.Linkname = ""
+                       } else {
+                               hdr.Linkname = info.link
+                       }
+                       if info.mode&fs.ModeDir > 0 {
+                               hdr.Typeflag = tar.TypeDir
+                       } else if info.mode&fs.ModeSymlink > 0 {
+                               hdr.Typeflag = tar.TypeSymlink
+                       } else if info.link == "" {
+                               hdr.Typeflag = tar.TypeReg
+                       } else {
+                               hdr.Typeflag = tar.TypeLink
+                       }
+                       if err = w.WriteHeader(&hdr); err != nil {
+                               log.Fatalln("can not WriteHeader:", err)
+                       }
+                       if hdr.Typeflag != tar.TypeReg {
+                               continue
+                       }
+                       fd, err = os.Open(info.name)
+                       if err != nil {
+                               log.Fatalln(err)
+                       }
+                       if _, err = io.CopyBuffer(w, fd, buf); err != nil {
+                               log.Fatalln(err)
+                       }
+                       fd.Close()
+               }
+       }
+       if err = w.Close(); err != nil {
+               log.Fatalln(err)
+       }
+       if err = bw.Flush(); err != nil {
+               log.Fatalln(err)
+       }
+}
index 343977e0d233a6906962d1e3a84ee09ed4d9df5374d7fa206ad67ce2306e6cb8..4a75290e91a6db34980e9f4447001d2ce2c52c7559cc5d5a6bb03957c005577a 100755 (executable)
 [ -n "$BASS_ROOT" ] || BASS_ROOT="$(dirname "$(realpath -- "$0")")"/../..
 sname="$0" . "$BASS_ROOT"/lib/rc
 
-BASS_BIRTHTIME="2024-02-12 14:08:37"
-
 namenhash="$1"
 [ -d "$namenhash" ]
 shift
 
+[ -x "$BASS_ROOT"/build/contrib/detpax/detpax ] ||
+    ( cd "$BASS_ROOT"/build/contrib/detpax ; ./build )
+
 [ -n "$BASS_NOSYNC" ] || find $namenhash -type f -or -type d -exec $FSYNC {} +
 
 tmp=$(mktemp -d $TMPDIR/$namenhash-mk-pkg.XXXXXX)
@@ -45,21 +46,9 @@ EOF
 meta4ra-create -no-published -no-generator -hashes "$META4RA_HASHES" \
     -fn buildinfo <$tmp/buildinfo >$tmp/buildinfo.meta4
 
-# https://reproducible-builds.org/docs/archives/
-# bsdtar tries to use ustar format by default, adding pax-headers only
-# if ustar is incapable of storing something (very long names, big files).
-# Explicit pax format usage leads to atime/ctime fields inclusion.
-# Forced mtime date is birthtime of the BASS.
 chmod -R a-w $namenhash
-find $namenhash -exec touch -h -d "$BASS_BIRTHTIME" {} +
-find $namenhash -print0 | LC_ALL=C sort -z >$tmp/tar-list
-{
-    perl -0 -F/ -lane 'print if $F[1] eq "skelpkg"' $tmp/tar-list
-    perl -0 -F/ -lane 'print unless $F[1] eq "skelpkg"' $tmp/tar-list
-} >$tmp/tar-list-sorted
-mv $tmp/tar-list-sorted $tmp/tar-list
 {
-    $TAR cfTn - $tmp/tar-list --null --uid=0 --gid=0 --numeric-owner ||
+    "$BASS_ROOT"/build/contrib/detpax/detpax -prec skelpkg $namenhash ||
     touch $tmp/tar-pipe-failed
 } |
 { $COMPRESSOR || touch $tmp/tar-pipe-failed ; } |
@@ -74,5 +63,4 @@ mv $namenhash bin
 mv $namenhash.meta4 bin.meta4
 totar="name name.meta4 buildinfo buildinfo.meta4 bin.meta4 bin"
 chmod a-w $totar
-touch -h -d "$BASS_BIRTHTIME" $totar
-$TAR cf - --uid=0 --gid=0 --numeric-owner $totar
+"$BASS_ROOT"/build/contrib/detpax/detpax $totar
index 4efe15230d1f5d285d6ba6bd80511513151e66076034d68eaef7934da4aedeed..6b6b9bcc0c7b8b4252900aac2340925f38331853b9a1fb594eadb0a78a51db07 100644 (file)
@@ -1,4 +1,4 @@
-module rnd.stcnet.ru/zip-hash
+module go.cypherpunks.ru/bass/zip-hash
 
 go 1.22.1
 
index 4ae9eede14067a7bc920f5a63e39e7d808ef94155c64e0a2de9b3ad2b9449ba1..1e51052dbb0044bcd86a2c6aecd0e6170c6d93819ebebebd11d1ad00bce9e33c 100644 (file)
@@ -36,14 +36,13 @@ single file, uncompressed POSIX pax archive with following entries:
 
 @cindex pax archive
 @cindex ustar archive
+@pindex detpax
 POSIX ustar archive format can not hold more than 8GiB of data and (very)
 long filenames. Forced pax usage guarantees compatibility with variety
 of OSes. GNU tar's format (also not having limitations above) easily
-could be unreadable on non-GNU systems. Unfortunately forced pax causes
-including of @command{atime}/@command{ctime} metainformation, that can
-not be omitted from @command{bsdtar} CLI. They prevent byte-to-byte
-deterministic archives creation. So we actually use ustar, that will be
-upgraded to pax if necessary (too long filenames or sizes).
+could be unreadable on non-GNU systems. BASS uses
+@command{build/contrib/detpax} archiver for creating pax archives in
+deterministic bit-to-bit reproducible way.
 
 As pax/tar does not have any kind of index, as ZIP does, it is crucial
 to place the largest @file{bin} file at the very end of the archive. And