From 3cfe836f7bfabdb0046f2ac4a1e47da31e6d1879 Mon Sep 17 00:00:00 2001 From: Klaus Post Date: Tue, 5 Apr 2022 08:05:37 -0700 Subject: [PATCH] s2: Simplify asm code (#541) * s2: Simplify asm code --- s2/_generate/gen.go | 17 +- s2/encodeblock_amd64.go | 2 + s2/encodeblock_amd64.s | 350 ++-------------------------------------- 3 files changed, 20 insertions(+), 349 deletions(-) diff --git a/s2/_generate/gen.go b/s2/_generate/gen.go index 9ea8eeded0..8d07364407 100644 --- a/s2/_generate/gen.go +++ b/s2/_generate/gen.go @@ -36,6 +36,15 @@ func main() { Constraint(buildtags.Term("gc").ToConstraint()) Constraint(buildtags.Not("noasm").ToConstraint()) + // We need a function to add comments. + TEXT("_dummy_", 0, "func()") + Comment("#ifdef GOAMD64_v4") + Comment("#ifndef GOAMD64_v3") + Comment("#define GOAMD64_v3") + Comment("#endif") + Comment("#endif") + RET() + o := options{ bmi1: false, bmi2: false, @@ -2507,14 +2516,6 @@ func (o options) matchLen(name string, a, b, len reg.GPVirtual, end LabelRef) re // 2016 BMI :TZCNT r64, r64 L: 0.57ns= 2.0c T: 0.29ns= 1.00c // 315 AMD64 :BSF r64, r64 L: 0.88ns= 3.1c T: 0.86ns= 3.00c TZCNTQ(tmp, tmp) - Comment("#define TZCNTQ_EMITTED 1") - Comment("#endif\n") - Comment("#ifdef GOAMD64_v4") - TZCNTQ(tmp, tmp) - Comment("#define TZCNTQ_EMITTED 1") - Comment("#endif\n") - Comment("#ifdef TZCNTQ_EMITTED") - Comment("#undef TZCNTQ_EMITTED") Comment("#else") BSFQ(tmp, tmp) Comment("#endif") diff --git a/s2/encodeblock_amd64.go b/s2/encodeblock_amd64.go index d9312e5b92..88f27c0990 100644 --- a/s2/encodeblock_amd64.go +++ b/s2/encodeblock_amd64.go @@ -5,6 +5,8 @@ package s2 +func _dummy_() + // encodeBlockAsm encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 4294967295 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. diff --git a/s2/encodeblock_amd64.s b/s2/encodeblock_amd64.s index 729dbf536c..b5a057d972 100644 --- a/s2/encodeblock_amd64.s +++ b/s2/encodeblock_amd64.s @@ -5,6 +5,15 @@ #include "textflag.h" +// func _dummy_() +TEXT ·_dummy_(SB), $0 +#ifdef GOAMD64_v4 +#ifndef GOAMD64_v3 +#define GOAMD64_v3 +#endif +#endif + RET + // func encodeBlockAsm(dst []byte, src []byte) int // Requires: BMI, SSE2 TEXT ·encodeBlockAsm(SB), $65560-56 @@ -253,17 +262,6 @@ matchlen_loopback_repeat_extend_encodeBlockAsm: #ifdef GOAMD64_v3 TZCNTQ R11, R11 -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef GOAMD64_v4 - TZCNTQ R11, R11 - -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef TZCNTQ_EMITTED -#undef TZCNTQ_EMITTED #else BSFQ R11, R11 @@ -791,17 +789,6 @@ matchlen_loopback_match_nolit_encodeBlockAsm: #ifdef GOAMD64_v3 TZCNTQ R9, R9 -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef GOAMD64_v4 - TZCNTQ R9, R9 - -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef TZCNTQ_EMITTED -#undef TZCNTQ_EMITTED #else BSFQ R9, R9 @@ -1466,17 +1453,6 @@ matchlen_loopback_repeat_extend_encodeBlockAsm4MB: #ifdef GOAMD64_v3 TZCNTQ R11, R11 -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef GOAMD64_v4 - TZCNTQ R11, R11 - -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef TZCNTQ_EMITTED -#undef TZCNTQ_EMITTED #else BSFQ R11, R11 @@ -1963,17 +1939,6 @@ matchlen_loopback_match_nolit_encodeBlockAsm4MB: #ifdef GOAMD64_v3 TZCNTQ R9, R9 -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef GOAMD64_v4 - TZCNTQ R9, R9 - -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef TZCNTQ_EMITTED -#undef TZCNTQ_EMITTED #else BSFQ R9, R9 @@ -2597,17 +2562,6 @@ matchlen_loopback_repeat_extend_encodeBlockAsm12B: #ifdef GOAMD64_v3 TZCNTQ R11, R11 -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef GOAMD64_v4 - TZCNTQ R11, R11 - -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef TZCNTQ_EMITTED -#undef TZCNTQ_EMITTED #else BSFQ R11, R11 @@ -2979,17 +2933,6 @@ matchlen_loopback_match_nolit_encodeBlockAsm12B: #ifdef GOAMD64_v3 TZCNTQ R9, R9 -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef GOAMD64_v4 - TZCNTQ R9, R9 - -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef TZCNTQ_EMITTED -#undef TZCNTQ_EMITTED #else BSFQ R9, R9 @@ -3510,17 +3453,6 @@ matchlen_loopback_repeat_extend_encodeBlockAsm10B: #ifdef GOAMD64_v3 TZCNTQ R11, R11 -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef GOAMD64_v4 - TZCNTQ R11, R11 - -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef TZCNTQ_EMITTED -#undef TZCNTQ_EMITTED #else BSFQ R11, R11 @@ -3892,17 +3824,6 @@ matchlen_loopback_match_nolit_encodeBlockAsm10B: #ifdef GOAMD64_v3 TZCNTQ R9, R9 -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef GOAMD64_v4 - TZCNTQ R9, R9 - -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef TZCNTQ_EMITTED -#undef TZCNTQ_EMITTED #else BSFQ R9, R9 @@ -4423,17 +4344,6 @@ matchlen_loopback_repeat_extend_encodeBlockAsm8B: #ifdef GOAMD64_v3 TZCNTQ R11, R11 -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef GOAMD64_v4 - TZCNTQ R11, R11 - -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef TZCNTQ_EMITTED -#undef TZCNTQ_EMITTED #else BSFQ R11, R11 @@ -4795,17 +4705,6 @@ matchlen_loopback_match_nolit_encodeBlockAsm8B: #ifdef GOAMD64_v3 TZCNTQ R9, R9 -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef GOAMD64_v4 - TZCNTQ R9, R9 - -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef TZCNTQ_EMITTED -#undef TZCNTQ_EMITTED #else BSFQ R9, R9 @@ -5225,17 +5124,6 @@ matchlen_loopback_match_nolit_encodeBetterBlockAsm: #ifdef GOAMD64_v3 TZCNTQ R11, R11 -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef GOAMD64_v4 - TZCNTQ R11, R11 - -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef TZCNTQ_EMITTED -#undef TZCNTQ_EMITTED #else BSFQ R11, R11 @@ -6214,17 +6102,6 @@ matchlen_loopback_match_nolit_encodeBetterBlockAsm4MB: #ifdef GOAMD64_v3 TZCNTQ R11, R11 -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef GOAMD64_v4 - TZCNTQ R11, R11 - -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef TZCNTQ_EMITTED -#undef TZCNTQ_EMITTED #else BSFQ R11, R11 @@ -7138,17 +7015,6 @@ matchlen_loopback_match_nolit_encodeBetterBlockAsm12B: #ifdef GOAMD64_v3 TZCNTQ R11, R11 -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef GOAMD64_v4 - TZCNTQ R11, R11 - -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef TZCNTQ_EMITTED -#undef TZCNTQ_EMITTED #else BSFQ R11, R11 @@ -7916,17 +7782,6 @@ matchlen_loopback_match_nolit_encodeBetterBlockAsm10B: #ifdef GOAMD64_v3 TZCNTQ R11, R11 -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef GOAMD64_v4 - TZCNTQ R11, R11 - -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef TZCNTQ_EMITTED -#undef TZCNTQ_EMITTED #else BSFQ R11, R11 @@ -8694,17 +8549,6 @@ matchlen_loopback_match_nolit_encodeBetterBlockAsm8B: #ifdef GOAMD64_v3 TZCNTQ R11, R11 -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef GOAMD64_v4 - TZCNTQ R11, R11 - -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef TZCNTQ_EMITTED -#undef TZCNTQ_EMITTED #else BSFQ R11, R11 @@ -9584,17 +9428,6 @@ matchlen_loopback_repeat_extend_encodeSnappyBlockAsm: #ifdef GOAMD64_v3 TZCNTQ R10, R10 -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef GOAMD64_v4 - TZCNTQ R10, R10 - -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef TZCNTQ_EMITTED -#undef TZCNTQ_EMITTED #else BSFQ R10, R10 @@ -9918,17 +9751,6 @@ matchlen_loopback_match_nolit_encodeSnappyBlockAsm: #ifdef GOAMD64_v3 TZCNTQ R9, R9 -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef GOAMD64_v4 - TZCNTQ R9, R9 - -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef TZCNTQ_EMITTED -#undef TZCNTQ_EMITTED #else BSFQ R9, R9 @@ -10448,17 +10270,6 @@ matchlen_loopback_repeat_extend_encodeSnappyBlockAsm64K: #ifdef GOAMD64_v3 TZCNTQ R10, R10 -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef GOAMD64_v4 - TZCNTQ R10, R10 - -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef TZCNTQ_EMITTED -#undef TZCNTQ_EMITTED #else BSFQ R10, R10 @@ -10739,17 +10550,6 @@ matchlen_loopback_match_nolit_encodeSnappyBlockAsm64K: #ifdef GOAMD64_v3 TZCNTQ R9, R9 -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef GOAMD64_v4 - TZCNTQ R9, R9 - -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef TZCNTQ_EMITTED -#undef TZCNTQ_EMITTED #else BSFQ R9, R9 @@ -11226,17 +11026,6 @@ matchlen_loopback_repeat_extend_encodeSnappyBlockAsm12B: #ifdef GOAMD64_v3 TZCNTQ R10, R10 -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef GOAMD64_v4 - TZCNTQ R10, R10 - -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef TZCNTQ_EMITTED -#undef TZCNTQ_EMITTED #else BSFQ R10, R10 @@ -11517,17 +11306,6 @@ matchlen_loopback_match_nolit_encodeSnappyBlockAsm12B: #ifdef GOAMD64_v3 TZCNTQ R9, R9 -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef GOAMD64_v4 - TZCNTQ R9, R9 - -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef TZCNTQ_EMITTED -#undef TZCNTQ_EMITTED #else BSFQ R9, R9 @@ -12004,17 +11782,6 @@ matchlen_loopback_repeat_extend_encodeSnappyBlockAsm10B: #ifdef GOAMD64_v3 TZCNTQ R10, R10 -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef GOAMD64_v4 - TZCNTQ R10, R10 - -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef TZCNTQ_EMITTED -#undef TZCNTQ_EMITTED #else BSFQ R10, R10 @@ -12295,17 +12062,6 @@ matchlen_loopback_match_nolit_encodeSnappyBlockAsm10B: #ifdef GOAMD64_v3 TZCNTQ R9, R9 -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef GOAMD64_v4 - TZCNTQ R9, R9 - -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef TZCNTQ_EMITTED -#undef TZCNTQ_EMITTED #else BSFQ R9, R9 @@ -12782,17 +12538,6 @@ matchlen_loopback_repeat_extend_encodeSnappyBlockAsm8B: #ifdef GOAMD64_v3 TZCNTQ R10, R10 -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef GOAMD64_v4 - TZCNTQ R10, R10 - -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef TZCNTQ_EMITTED -#undef TZCNTQ_EMITTED #else BSFQ R10, R10 @@ -13071,17 +12816,6 @@ matchlen_loopback_match_nolit_encodeSnappyBlockAsm8B: #ifdef GOAMD64_v3 TZCNTQ R9, R9 -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef GOAMD64_v4 - TZCNTQ R9, R9 - -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef TZCNTQ_EMITTED -#undef TZCNTQ_EMITTED #else BSFQ R9, R9 @@ -13461,17 +13195,6 @@ matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm: #ifdef GOAMD64_v3 TZCNTQ R11, R11 -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef GOAMD64_v4 - TZCNTQ R11, R11 - -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef TZCNTQ_EMITTED -#undef TZCNTQ_EMITTED #else BSFQ R11, R11 @@ -14068,17 +13791,6 @@ matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm64K: #ifdef GOAMD64_v3 TZCNTQ R11, R11 -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef GOAMD64_v4 - TZCNTQ R11, R11 - -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef TZCNTQ_EMITTED -#undef TZCNTQ_EMITTED #else BSFQ R11, R11 @@ -14604,17 +14316,6 @@ matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm12B: #ifdef GOAMD64_v3 TZCNTQ R11, R11 -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef GOAMD64_v4 - TZCNTQ R11, R11 - -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef TZCNTQ_EMITTED -#undef TZCNTQ_EMITTED #else BSFQ R11, R11 @@ -15140,17 +14841,6 @@ matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm10B: #ifdef GOAMD64_v3 TZCNTQ R11, R11 -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef GOAMD64_v4 - TZCNTQ R11, R11 - -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef TZCNTQ_EMITTED -#undef TZCNTQ_EMITTED #else BSFQ R11, R11 @@ -15676,17 +15366,6 @@ matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm8B: #ifdef GOAMD64_v3 TZCNTQ R11, R11 -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef GOAMD64_v4 - TZCNTQ R11, R11 - -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef TZCNTQ_EMITTED -#undef TZCNTQ_EMITTED #else BSFQ R11, R11 @@ -16644,17 +16323,6 @@ matchlen_loopback_standalone: #ifdef GOAMD64_v3 TZCNTQ BX, BX -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef GOAMD64_v4 - TZCNTQ BX, BX - -#define TZCNTQ_EMITTED 1 -#endif - -#ifdef TZCNTQ_EMITTED -#undef TZCNTQ_EMITTED #else BSFQ BX, BX