From 3b0a7afb07ee9e7cf2730e505f4a43099764bba34c731867ef165294b5492371 Mon Sep 17 00:00:00 2001 From: Sergey Matveev Date: Fri, 13 Dec 2024 21:04:07 +0300 Subject: [PATCH] Unroll loops --- cyac/lib/frombe.c | 54 ++++++++++++++++++++++-- cyac/lib/tobe.c | 63 +++++++++++++++++++++++++--- gyac/atom/be/be.go | 101 +++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 205 insertions(+), 13 deletions(-) diff --git a/cyac/lib/frombe.c b/cyac/lib/frombe.c index 92f7c44..d194cb4 100644 --- a/cyac/lib/frombe.c +++ b/cyac/lib/frombe.c @@ -7,10 +7,56 @@ uint64_t yacFromBE(const unsigned char *buf, const size_t len) { uint64_t v = 0; - for (size_t i = 0; i < len; i++) { - v |= (uint64_t)(buf[i]) << ((len - i - 1) * 8); + switch (len) { + case 1: + v = (uint64_t)(buf[0]); + break; + case 2: + v = ((uint64_t)(buf[0]) << (uint64_t)8) | (uint64_t)(buf[1]); + break; + case 3: + v = ((uint64_t)(buf[0]) << (uint64_t)16) | ((uint64_t)(buf[1]) << (uint64_t)8) | + (uint64_t)(buf[2]); + break; + case 4: + v = ((uint64_t)(buf[0]) << (uint64_t)24) | + ((uint64_t)(buf[1]) << (uint64_t)16) | ((uint64_t)(buf[2]) << (uint64_t)8) | + (uint64_t)(buf[3]); + break; + case 5: + v = ((uint64_t)(buf[0]) << (uint64_t)32) | + ((uint64_t)(buf[1]) << (uint64_t)24) | + ((uint64_t)(buf[2]) << (uint64_t)16) | ((uint64_t)(buf[3]) << (uint64_t)8) | + (uint64_t)(buf[4]); + break; + case 6: + v = ((uint64_t)(buf[0]) << (uint64_t)40) | + ((uint64_t)(buf[1]) << (uint64_t)32) | + ((uint64_t)(buf[2]) << (uint64_t)24) | + ((uint64_t)(buf[3]) << (uint64_t)16) | ((uint64_t)(buf[4]) << (uint64_t)8) | + (uint64_t)(buf[5]); + break; + case 7: + v = ((uint64_t)(buf[0]) << (uint64_t)48) | + ((uint64_t)(buf[1]) << (uint64_t)40) | + ((uint64_t)(buf[2]) << (uint64_t)32) | + ((uint64_t)(buf[3]) << (uint64_t)24) | + ((uint64_t)(buf[4]) << (uint64_t)16) | ((uint64_t)(buf[5]) << (uint64_t)8) | + (uint64_t)(buf[6]); + break; + case 8: + v = ((uint64_t)(buf[0]) << (uint64_t)56) | + ((uint64_t)(buf[1]) << (uint64_t)48) | + ((uint64_t)(buf[2]) << (uint64_t)40) | + ((uint64_t)(buf[3]) << (uint64_t)32) | + ((uint64_t)(buf[4]) << (uint64_t)24) | + ((uint64_t)(buf[5]) << (uint64_t)16) | ((uint64_t)(buf[6]) << (uint64_t)8) | + (uint64_t)(buf[7]); + break; + default: + for (size_t i = 0; i < len; i++) { + v |= (uint64_t)(buf[i]) << ((len - i - 1) * 8); + } } - // this can be replaced by a switch with hard-coded decoding - // sequence without any loops for each of eight possible lengths return v; } diff --git a/cyac/lib/tobe.c b/cyac/lib/tobe.c index a66fb1c..9dacd33 100644 --- a/cyac/lib/tobe.c +++ b/cyac/lib/tobe.c @@ -6,10 +6,63 @@ void yacToBE(unsigned char *buf, const size_t len, const uint64_t v) { - for (size_t i = 0; i < len; i++) { - buf[i] = - (unsigned char)(((v & ((uint64_t)0xFF << ((len - i - 1) * 8))) >> ((len - i - 1) * 8)) & (uint8_t)0xFF); + switch (len) { + case 1: + buf[0] = (v & (uint64_t)0x00000000000000FF); + break; + case 2: + buf[1] = (v & (uint64_t)0x00000000000000FF); + buf[0] = (v & (uint64_t)0x000000000000FF00) >> (uint8_t)8; + break; + case 3: + buf[2] = (v & (uint64_t)0x00000000000000FF); + buf[1] = (v & (uint64_t)0x000000000000FF00) >> (uint8_t)8; + buf[0] = (v & (uint64_t)0x0000000000FF0000) >> (uint8_t)16; + break; + case 4: + buf[3] = (v & (uint64_t)0x00000000000000FF); + buf[2] = (v & (uint64_t)0x000000000000FF00) >> (uint8_t)8; + buf[1] = (v & (uint64_t)0x0000000000FF0000) >> (uint8_t)16; + buf[0] = (v & (uint64_t)0x00000000FF000000) >> (uint8_t)24; + break; + case 5: + buf[4] = (v & (uint64_t)0x00000000000000FF); + buf[3] = (v & (uint64_t)0x000000000000FF00) >> (uint8_t)8; + buf[2] = (v & (uint64_t)0x0000000000FF0000) >> (uint8_t)16; + buf[1] = (v & (uint64_t)0x00000000FF000000) >> (uint8_t)24; + buf[0] = (v & (uint64_t)0x000000FF00000000) >> (uint8_t)32; + break; + case 6: + buf[5] = (v & (uint64_t)0x00000000000000FF); + buf[4] = (v & (uint64_t)0x000000000000FF00) >> (uint8_t)8; + buf[3] = (v & (uint64_t)0x0000000000FF0000) >> (uint8_t)16; + buf[2] = (v & (uint64_t)0x00000000FF000000) >> (uint8_t)24; + buf[1] = (v & (uint64_t)0x000000FF00000000) >> (uint8_t)32; + buf[0] = (v & (uint64_t)0x0000FF0000000000) >> (uint8_t)40; + break; + case 7: + buf[6] = (v & (uint64_t)0x00000000000000FF); + buf[5] = (v & (uint64_t)0x000000000000FF00) >> (uint8_t)8; + buf[4] = (v & (uint64_t)0x0000000000FF0000) >> (uint8_t)16; + buf[3] = (v & (uint64_t)0x00000000FF000000) >> (uint8_t)24; + buf[2] = (v & (uint64_t)0x000000FF00000000) >> (uint8_t)32; + buf[1] = (v & (uint64_t)0x0000FF0000000000) >> (uint8_t)40; + buf[0] = (v & (uint64_t)0x00FF000000000000) >> (uint8_t)48; + break; + case 8: + buf[7] = (v & (uint64_t)0x00000000000000FF); + buf[6] = (v & (uint64_t)0x000000000000FF00) >> (uint8_t)8; + buf[5] = (v & (uint64_t)0x0000000000FF0000) >> (uint8_t)16; + buf[4] = (v & (uint64_t)0x00000000FF000000) >> (uint8_t)24; + buf[3] = (v & (uint64_t)0x000000FF00000000) >> (uint8_t)32; + buf[2] = (v & (uint64_t)0x0000FF0000000000) >> (uint8_t)40; + buf[1] = (v & (uint64_t)0x00FF000000000000) >> (uint8_t)48; + buf[0] = (v & (uint64_t)0xFF00000000000000) >> (uint8_t)56; + break; + default: + for (size_t i = 0; i < len; i++) { + buf[i] = + (unsigned char)(((v & ((uint64_t)0xFF << ((len - i - 1) * 8))) >> ((len - i - 1) * 8)) & (uint8_t)0xFF); + } } - // this can be replaced by a switch with hard-coded decoding - // sequence without any loops for each of eight possible lengths } diff --git a/gyac/atom/be/be.go b/gyac/atom/be/be.go index f37d54e..3773c41 100644 --- a/gyac/atom/be/be.go +++ b/gyac/atom/be/be.go @@ -17,14 +17,107 @@ package be func Get(buf []byte) (v uint64) { - for i := 0; i < len(buf); i++ { - v |= uint64(buf[i]) << ((len(buf) - i - 1) * 8) + switch len(buf) { + case 1: + v = uint64(buf[0]) + case 2: + v = (uint64(buf[0]) << 8) | uint64(buf[1]) + case 3: + v = (uint64(buf[0]) << 16) | + (uint64(buf[1]) << 8) | + uint64(buf[2]) + case 4: + v = (uint64(buf[0]) << 24) | + (uint64(buf[1]) << 16) | + (uint64(buf[2]) << 8) | + uint64(buf[3]) + case 5: + v = (uint64(buf[0]) << 32) | + (uint64(buf[1]) << 24) | + (uint64(buf[2]) << 16) | + (uint64(buf[3]) << 8) | + uint64(buf[4]) + case 6: + v = (uint64(buf[0]) << 40) | + (uint64(buf[1]) << 32) | + (uint64(buf[2]) << 24) | + (uint64(buf[3]) << 16) | + (uint64(buf[4]) << 8) | + uint64(buf[5]) + case 7: + v = (uint64(buf[0]) << 48) | + (uint64(buf[1]) << 40) | + (uint64(buf[2]) << 32) | + (uint64(buf[3]) << 24) | + (uint64(buf[4]) << 16) | + (uint64(buf[5]) << 8) | + uint64(buf[6]) + case 8: + v = (uint64(buf[0]) << 56) | + (uint64(buf[1]) << 48) | + (uint64(buf[2]) << 40) | + (uint64(buf[3]) << 32) | + (uint64(buf[4]) << 24) | + (uint64(buf[5]) << 16) | + (uint64(buf[6]) << 8) | + uint64(buf[7]) + default: + for i := 0; i < len(buf); i++ { + v |= uint64(buf[i]) << ((len(buf) - i - 1) * 8) + } } return } func Put(buf []byte, v uint64) { - for i := 0; i < len(buf); i++ { - buf[i] = byte((v & (0xFF << ((len(buf) - i - 1) * 8)) >> ((len(buf) - i - 1) * 8)) & 0xFF) + switch len(buf) { + case 1: + buf[0] = byte(v & 0x00000000000000FF) + case 2: + buf[1] = byte(v & 0x00000000000000FF) + buf[0] = byte((v & 0x000000000000FF00) >> 8) + case 3: + buf[2] = byte(v & 0x00000000000000FF) + buf[1] = byte((v & 0x000000000000FF00) >> 8) + buf[0] = byte((v & 0x0000000000FF0000) >> 16) + case 4: + buf[3] = byte(v & 0x00000000000000FF) + buf[2] = byte((v & 0x000000000000FF00) >> 8) + buf[1] = byte((v & 0x0000000000FF0000) >> 16) + buf[0] = byte((v & 0x00000000FF000000) >> 24) + case 5: + buf[4] = byte(v & 0x00000000000000FF) + buf[3] = byte((v & 0x000000000000FF00) >> 8) + buf[2] = byte((v & 0x0000000000FF0000) >> 16) + buf[1] = byte((v & 0x00000000FF000000) >> 24) + buf[0] = byte((v & 0x000000FF00000000) >> 32) + case 6: + buf[5] = byte(v & 0x00000000000000FF) + buf[4] = byte((v & 0x000000000000FF00) >> 8) + buf[3] = byte((v & 0x0000000000FF0000) >> 16) + buf[2] = byte((v & 0x00000000FF000000) >> 24) + buf[1] = byte((v & 0x000000FF00000000) >> 32) + buf[0] = byte((v & 0x0000FF0000000000) >> 40) + case 7: + buf[6] = byte(v & 0x00000000000000FF) + buf[5] = byte((v & 0x000000000000FF00) >> 8) + buf[4] = byte((v & 0x0000000000FF0000) >> 16) + buf[3] = byte((v & 0x00000000FF000000) >> 24) + buf[2] = byte((v & 0x000000FF00000000) >> 32) + buf[1] = byte((v & 0x0000FF0000000000) >> 40) + buf[0] = byte((v & 0x00FF000000000000) >> 48) + case 8: + buf[7] = byte(v & 0x00000000000000FF) + buf[6] = byte((v & 0x000000000000FF00) >> 8) + buf[5] = byte((v & 0x0000000000FF0000) >> 16) + buf[4] = byte((v & 0x00000000FF000000) >> 24) + buf[3] = byte((v & 0x000000FF00000000) >> 32) + buf[2] = byte((v & 0x0000FF0000000000) >> 40) + buf[1] = byte((v & 0x00FF000000000000) >> 48) + buf[0] = byte((v & 0xFF00000000000000) >> 56) + default: + for i := 0; i < len(buf); i++ { + buf[i] = byte((v & (0xFF << ((len(buf) - i - 1) * 8)) >> ((len(buf) - i - 1) * 8)) & 0xFF) + } } } -- 2.48.1