From 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 16 Apr 2005 15:20:36 -0700 Subject: Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! --- arch/ia64/lib/ip_fast_csum.S | 90 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) create mode 100644 arch/ia64/lib/ip_fast_csum.S (limited to 'arch/ia64/lib/ip_fast_csum.S') diff --git a/arch/ia64/lib/ip_fast_csum.S b/arch/ia64/lib/ip_fast_csum.S new file mode 100644 index 000000000000..19674ca2acfc --- /dev/null +++ b/arch/ia64/lib/ip_fast_csum.S @@ -0,0 +1,90 @@ +/* + * Optmized version of the ip_fast_csum() function + * Used for calculating IP header checksum + * + * Return: 16bit checksum, complemented + * + * Inputs: + * in0: address of buffer to checksum (char *) + * in1: length of the buffer (int) + * + * Copyright (C) 2002 Intel Corp. + * Copyright (C) 2002 Ken Chen + */ + +#include + +/* + * Since we know that most likely this function is called with buf aligned + * on 4-byte boundary and 20 bytes in length, we can execution rather quickly + * versus calling generic version of do_csum, which has lots of overhead in + * handling various alignments and sizes. However, due to lack of constrains + * put on the function input argument, cases with alignment not on 4-byte or + * size not equal to 20 bytes will be handled by the generic do_csum function. + */ + +#define in0 r32 +#define in1 r33 +#define ret0 r8 + +GLOBAL_ENTRY(ip_fast_csum) + .prologue + .body + cmp.ne p6,p7=5,in1 // size other than 20 byte? + and r14=3,in0 // is it aligned on 4-byte? + add r15=4,in0 // second source pointer + ;; + cmp.ne.or.andcm p6,p7=r14,r0 + ;; +(p7) ld4 r20=[in0],8 +(p7) ld4 r21=[r15],8 +(p6) br.spnt .generic + ;; + ld4 r22=[in0],8 + ld4 r23=[r15],8 + ;; + ld4 r24=[in0] + add r20=r20,r21 + add r22=r22,r23 + ;; + add r20=r20,r22 + ;; + add r20=r20,r24 + ;; + shr.u ret0=r20,16 // now need to add the carry + zxt2 r20=r20 + ;; + add r20=ret0,r20 + ;; + shr.u ret0=r20,16 // add carry again + zxt2 r20=r20 + ;; + add r20=ret0,r20 + ;; + shr.u ret0=r20,16 + zxt2 r20=r20 + ;; + add r20=ret0,r20 + ;; + andcm ret0=-1,r20 + .restore sp // reset frame state + br.ret.sptk.many b0 + ;; + +.generic: + .prologue + .save ar.pfs, r35 + alloc r35=ar.pfs,2,2,2,0 + .save rp, r34 + mov r34=b0 + .body + dep.z out1=in1,2,30 + mov out0=in0 + ;; + br.call.sptk.many b0=do_csum + ;; + andcm ret0=-1,ret0 + mov ar.pfs=r35 + mov b0=r34 + br.ret.sptk.many b0 +END(ip_fast_csum) -- cgit v1.2.1