[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [XENVIF PATCH RESEND] Implement faster checksum for x64
Reviewed-by: Owen Smith <owen.smith@xxxxxxxxx> ________________________________________ From: Tu Dinh <ngoc-tu.dinh@xxxxxxxxxx> Sent: 10 October 2025 10:09 AM To: win-pv-devel@xxxxxxxxxxxxxxxxxxxx Cc: Tu Dinh; Owen Smith Subject: [XENVIF PATCH RESEND] Implement faster checksum for x64 Implement the Internet checksum using parallel summation based on add-with-carry per RFC 1071, following the algorithm described here: https://blogs.igalia.com/dpino/2018/06/14/fast-checksum-computation/ This implementation provides >8x speed improvement over the previous C implementation: C assembly 40 9.53867 ns 2.77659 ns 128 31.0441 ns 4.43259 ns 576 125.706 ns 14.332 ns 1500 320.611 ns 38.434 ns 2048 436.591 ns 53.45 ns 4096 869.137 ns 114.892 ns 8192 1.73352 us 239.596 ns 16384 3.46273 us 482.943 ns 32768 6.92263 us 974.495 ns 65535 13.8462 us 1.9464 us At the same time, move the entire checksum code into a single function instead of having a static inline implementation and a wrapper. Signed-off-by: Tu Dinh Ngoc <dinhngoc.tu@xxxxxxx> Signed-off-by: Tu Dinh <ngoc-tu.dinh@xxxxxxxxxx> --- Resending a 4th time (!) due to previous email being lost. --- src/xenvif/amd64/checksum_amd64.asm | 120 ++++++++++++++++++++++++++++ src/xenvif/checksum.c | 37 ++++----- vs2019/xenvif/xenvif.vcxproj | 5 ++ vs2022/xenvif/xenvif.vcxproj | 5 ++ 4 files changed, 145 insertions(+), 22 deletions(-) create mode 100644 src/xenvif/amd64/checksum_amd64.asm diff --git a/src/xenvif/amd64/checksum_amd64.asm b/src/xenvif/amd64/checksum_amd64.asm new file mode 100644 index 0000000..8fbc241 --- /dev/null +++ b/src/xenvif/amd64/checksum_amd64.asm @@ -0,0 +1,120 @@ +; SPDX-License-Identifier: MIT + +.code + +; VOID +; AccumulateChecksum( +; IN OUT PULONG Accumulator, +; IN PUCHAR BaseVa, +; IN ULONG ByteCount +; ) +public AccumulateChecksum +AccumulateChecksum proc + ; rcx: Accumulator + ; rdx: BaseVa + ; r8: ByteCount + ; rax: temporary accumulator + ; r9: scratch + + mov eax, [rcx] + +l64: + cmp r8, 64 ; eight qwords + jb l32 + + add rax, [rdx] + adc rax, [rdx + 8] + adc rax, [rdx + 16] + adc rax, [rdx + 24] + adc rax, [rdx + 32] + adc rax, [rdx + 40] + adc rax, [rdx + 48] + adc rax, [rdx + 56] + adc rax, 0 + + sub r8, 64 + add rdx, 64 + jmp l64 + +l32: + cmp r8, 32 ; four qwords + jb l16 + + add rax, [rdx] + adc rax, [rdx + 8] + adc rax, [rdx + 16] + adc rax, [rdx + 24] + adc rax, 0 + + sub r8, 32 + add rdx, 32 + +l16: + cmp r8, 16 ; two qwords + jb l8 + + add rax, [rdx] + adc rax, [rdx + 8] + adc rax, 0 + + sub r8, 16 + add rdx, 16 + +l8: + cmp r8, 8 ; one qword + jb l4 + + add rax, [rdx] + adc rax, 0 + + sub r8, 8 + add rdx, 8 + +l4: + cmp r8, 4 ; one dword + jb l2 + + mov r9d, dword ptr [rdx] + add rax, r9 + adc rax, 0 + + sub r8, 4 + add rdx, 4 + +l2: + cmp r8, 2 ; one word + jb l1 + + movzx r9d, word ptr [rdx] + add rax, r9 + adc rax, 0 + + sub r8, 2 + add rdx, 2 + +l1: + cmp r8, 1 ; last byte + jb l0 + + movzx r9d, byte ptr [rdx] + add rax, r9 + adc rax, 0 + +l0: + mov r9, rax + shr r9, 32 + add eax, r9d + adc eax, 0 + + mov r9d, eax + shr r9d, 16 + add ax, r9w + adc ax, 0 + + movzx eax, ax + + mov [rcx], eax + ret +AccumulateChecksum endp + +end diff --git a/src/xenvif/checksum.c b/src/xenvif/checksum.c index 4e8f2fc..a7181c6 100644 --- a/src/xenvif/checksum.c +++ b/src/xenvif/checksum.c @@ -35,6 +35,7 @@ #include <stdlib.h> #include <ethernet.h> #include <tcpip.h> +#include <intrin.h> #include <vif_interface.h> @@ -43,8 +44,9 @@ #include "assert.h" #include "util.h" -static FORCEINLINE VOID -__AccumulateChecksum( +#if !defined(_M_X64) +VOID +AccumulateChecksum( IN OUT PULONG Accumulator, IN PUCHAR BaseVa, IN ULONG ByteCount @@ -70,16 +72,7 @@ __AccumulateChecksum( *Accumulator = Current; } - -VOID -AccumulateChecksum( - IN OUT PULONG Accumulator, - IN PVOID BaseVa, - IN ULONG ByteCount - ) -{ - __AccumulateChecksum(Accumulator, BaseVa, ByteCount); -} +#endif BOOLEAN ChecksumVerify( @@ -96,7 +89,7 @@ ChecksumVerify( Accumulator &= 0xFFFF; // See RFC 1624, section 5 - __AccumulateChecksum(&Accumulator, (PUCHAR)&Embedded, sizeof (USHORT)); + AccumulateChecksum(&Accumulator, (PUCHAR)&Embedded, sizeof (USHORT)); return (Accumulator == 0xFFFF) ? TRUE : FALSE; } @@ -120,7 +113,7 @@ __ChecksumIpVersion4PseudoHeader( Header.Protocol = Protocol; Accumulator = 0; - __AccumulateChecksum(&Accumulator, (PUCHAR)&Header, sizeof (IPV4_PSEUDO_HEADER)); + AccumulateChecksum(&Accumulator, (PUCHAR)&Header, sizeof (IPV4_PSEUDO_HEADER)); // As-per RFC1624, Accumulator should never be 0. ASSERT(Accumulator != 0); @@ -161,7 +154,7 @@ __ChecksumIpVersion6PseudoHeader( Header.NextHeader = Protocol; Accumulator = 0; - __AccumulateChecksum(&Accumulator, (PUCHAR)&Header, sizeof (IPV6_PSEUDO_HEADER)); + AccumulateChecksum(&Accumulator, (PUCHAR)&Header, sizeof (IPV6_PSEUDO_HEADER)); // As-per RFC1624, Accumulator should never be 0. ASSERT(Accumulator != 0); @@ -250,14 +243,14 @@ ChecksumIpVersion4Header( Header->Checksum = 0; Accumulator = 0; - __AccumulateChecksum(&Accumulator, + AccumulateChecksum(&Accumulator, StartVa + Info->IpHeader.Offset, Info->IpHeader.Length); Header->Checksum = Saved; if (Info->IpOptions.Length != 0) - __AccumulateChecksum(&Accumulator, + AccumulateChecksum(&Accumulator, StartVa + Info->IpOptions.Offset, Info->IpOptions.Length); @@ -293,14 +286,14 @@ ChecksumTcpPacket( TcpHeader->Checksum = 0; Accumulator = PseudoHeaderChecksum; - __AccumulateChecksum(&Accumulator, + AccumulateChecksum(&Accumulator, StartVa + Info->TcpHeader.Offset, Info->TcpHeader.Length); TcpHeader->Checksum = Saved; if (Info->TcpOptions.Length != 0) - __AccumulateChecksum(&Accumulator, + AccumulateChecksum(&Accumulator, StartVa + Info->TcpOptions.Offset, Info->TcpOptions.Length); @@ -338,7 +331,7 @@ ChecksumTcpPacket( ByteCount -= Offset; ByteCount = __min(ByteCount, Length); - __AccumulateChecksum(&Accumulator, BaseVa, ByteCount); + AccumulateChecksum(&Accumulator, BaseVa, ByteCount); Length -= ByteCount; @@ -378,7 +371,7 @@ ChecksumUdpPacket( UdpHeader->Checksum = 0; Accumulator = PseudoHeaderChecksum; - __AccumulateChecksum(&Accumulator, + AccumulateChecksum(&Accumulator, StartVa + Info->UdpHeader.Offset, Info->UdpHeader.Length); @@ -417,7 +410,7 @@ ChecksumUdpPacket( ByteCount -= Offset; ByteCount = __min(ByteCount, Length); - __AccumulateChecksum(&Accumulator, BaseVa, ByteCount); + AccumulateChecksum(&Accumulator, BaseVa, ByteCount); Length -= ByteCount; diff --git a/vs2019/xenvif/xenvif.vcxproj b/vs2019/xenvif/xenvif.vcxproj index 0412426..80f37b6 100644 --- a/vs2019/xenvif/xenvif.vcxproj +++ b/vs2019/xenvif/xenvif.vcxproj @@ -88,6 +88,11 @@ <ClCompile Include="../../src/xenvif/controller.c" /> <ClCompile Include="../../src/xenvif/vif.c" /> </ItemGroup> + <ItemGroup> + <MASM Include="../../src/xenvif/amd64/checksum_amd64.asm"> + <ExcludedFromBuild Condition="'$(Platform)'!='x64'">true</ExcludedFromBuild> + </MASM> + </ItemGroup> <ItemGroup> <ResourceCompile Include="..\..\src\xenvif\xenvif.rc" /> </ItemGroup> diff --git a/vs2022/xenvif/xenvif.vcxproj b/vs2022/xenvif/xenvif.vcxproj index be84232..8b7d910 100644 --- a/vs2022/xenvif/xenvif.vcxproj +++ b/vs2022/xenvif/xenvif.vcxproj @@ -80,6 +80,11 @@ <ClCompile Include="../../src/xenvif/controller.c" /> <ClCompile Include="../../src/xenvif/vif.c" /> </ItemGroup> + <ItemGroup> + <MASM Include="../../src/xenvif/amd64/checksum_amd64.asm"> + <ExcludedFromBuild Condition="'$(Platform)'!='x64'">true</ExcludedFromBuild> + </MASM> + </ItemGroup> <ItemGroup> <ResourceCompile Include="..\..\src\xenvif\xenvif.rc" /> </ItemGroup> -- 2.51.0.windows.1 -- Ngoc Tu Dinh | Vates XCP-ng Developer XCP-ng & Xen Orchestra - Vates solutions web: https://vates.tech
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |