diff --git a/AGENTS.md b/AGENTS.md index 93bf8d64..711bc96a 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -64,6 +64,7 @@ Implement in sequential passes. Don't write code until you've completed the veri **Pass 2: Check scope.** Does this need to exist? Check if it already exists in the API. Is this the library's job or the user's job? The library handles protocol correctness; application concerns (reconnection, auth, routing) belong in user code. **Pass 3: Check invariants.** Walk through Key Invariants before writing code: + - Reads: Will something still read from the connection? - Pools: Will pooled objects be returned on all paths? - Locks: Are you using context-aware `mu`, not `sync.Mutex`? @@ -118,6 +119,7 @@ Never use emdash. Use commas, semicolons, or separate sentences. **Naming matters.** Before proposing a name, stop and review existing names in the file. Ask: what would someone assume from this name? Does it fit with how similar things are named? A good name is accurate on its own and consistent in context. **Comment content:** + - Add information beyond what the code shows (not tautologies) - State directly: "Returns X" (not "Note that this returns X") - Drop filler: "basically", "actually", "really" add nothing diff --git a/go.mod b/go.mod index d32fbd77..54c4c7d6 100644 --- a/go.mod +++ b/go.mod @@ -1,3 +1,5 @@ module github.com/coder/websocket go 1.23 + +require golang.org/x/sys v0.13.0 diff --git a/go.sum b/go.sum index e69de29b..5ba125a7 100644 --- a/go.sum +++ b/go.sum @@ -0,0 +1,2 @@ +golang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE= +golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= \ No newline at end of file diff --git a/internal/examples/go.mod b/internal/examples/go.mod index e368b76b..b0156f2f 100644 --- a/internal/examples/go.mod +++ b/internal/examples/go.mod @@ -8,3 +8,5 @@ require ( github.com/coder/websocket v0.0.0-00010101000000-000000000000 golang.org/x/time v0.7.0 ) + +require golang.org/x/sys v0.13.0 // indirect diff --git a/internal/examples/go.sum b/internal/examples/go.sum index 60aa8f9a..0b9e20a4 100644 --- a/internal/examples/go.sum +++ b/internal/examples/go.sum @@ -1,2 +1,4 @@ golang.org/x/time v0.7.0 h1:ntUhktv3OPE6TgYxXWv9vKvUSJyIFJlyohwbkEwPrKQ= golang.org/x/time v0.7.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= +golang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE= +golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= \ No newline at end of file diff --git a/mask_amd64.s b/mask_amd64.s index bd42be31..7d7da751 100644 --- a/mask_amd64.s +++ b/mask_amd64.s @@ -26,6 +26,11 @@ TEXT ·maskAsm(SB), NOSPLIT, $0-28 TESTQ $31, AX JNZ unaligned +aligned: + CMPB ·useAVX2(SB), $1 + JE avx2 + JMP sse + unaligned_loop_1byte: XORB SI, (AX) INCQ AX @@ -42,7 +47,7 @@ unaligned_loop_1byte: ORQ DX, DI TESTQ $31, AX - JZ sse + JZ aligned unaligned: TESTQ $7, AX // AND $7 & len, if not zero jump to loop_1b. @@ -55,7 +60,28 @@ unaligned_loop: SUBQ $8, CX TESTQ $31, AX JNZ unaligned_loop - JMP sse + JMP aligned + +avx2: + CMPQ CX, $0x80 + JL sse + VMOVQ DI, X0 + VPBROADCASTQ X0, Y0 + +avx2_loop: + VPXOR (AX), Y0, Y1 + VPXOR 32(AX), Y0, Y2 + VPXOR 64(AX), Y0, Y3 + VPXOR 96(AX), Y0, Y4 + VMOVDQU Y1, (AX) + VMOVDQU Y2, 32(AX) + VMOVDQU Y3, 64(AX) + VMOVDQU Y4, 96(AX) + ADDQ $0x80, AX + SUBQ $0x80, CX + CMPQ CX, $0x80 + JAE avx2_loop // loop if CX >= 0x80 + VZEROUPPER sse: CMPQ CX, $0x40 diff --git a/mask_asm.go b/mask_asm.go index f9484b5b..65b993d3 100644 --- a/mask_asm.go +++ b/mask_asm.go @@ -2,6 +2,8 @@ package websocket +import "golang.org/x/sys/cpu" + func mask(b []byte, key uint32) uint32 { // TODO: Will enable in v1.9.0. return maskGo(b, key) @@ -21,6 +23,9 @@ func mask(b []byte, key uint32) uint32 { // The AVX2 code I had to disable anyway as it wasn't performing as expected. // See https://github.com/nhooyr/websocket/pull/326#issuecomment-1771138049 // + +var useAVX2 = cpu.X86.HasAVX2 //lint:ignore U1000 mask_amd64.s + //go:noescape //lint:ignore U1000 disabled till v1.9.0 func maskAsm(b *byte, len int, key uint32) uint32