#include #include typedef __m128i vector; // vector of 4 int (mmx) inline void bitxor_vdiff(vector *a, vector *b, vector *c) { *c = *a ^ *b; } union vec128 { vector v; struct { uint64_t a; uint64_t b; } data; }; int bitxor_buf(unsigned char *ibuf, unsigned char *obuf, int size, int bsize) { int numblocks = size/bsize; int numvec = bsize/sizeof(vector); union vec128 *ba, *bb, *bc; union vec128 va __attribute__ ((aligned(16))); union vec128 vb __attribute__ ((aligned(16))); union vec128 vc __attribute__ ((aligned(16))); int i,j; for (i=0; i<(numblocks-1); i++) { ba=(union vec128 *)&ibuf[i*bsize]; bb=(union vec128 *)&ibuf[(i+1)*bsize]; bc=(union vec128 *)&obuf[i*bsize]; for (j=0; j #include #include #include #include #include #define BUF_NUMBLOCKS 1024 /* USAGE: bitxor */ int main(int argc, char **argv) { int n, p, rc, blocksize, bufsize, bufsize_read; unsigned char *ibuf, *obuf, *ibuf_read; int i; blocksize = atoi(argv[1]); bufsize = blocksize * BUF_NUMBLOCKS; if (posix_memalign((void *)&ibuf, 4096, bufsize) != 0) { perror("ERROR: posix_malloc"); return -1; } if (posix_memalign((void *)&obuf, 4096, bufsize) != 0) { perror("ERROR: posix_malloc"); return -1; } ibuf_read = ibuf; bufsize_read = bufsize; if (freopen(NULL, "rb", stdin) == 0) { perror("ERROR: freopen"); return -1; } while ((n = fread(ibuf_read, 1, bufsize_read, stdin)) > 0) { if (n % blocksize) { fprintf(stderr, "ERROR: n=%d fread not a multiple of blocksize=%d\n", n, blocksize); } if (ibuf != ibuf_read) n += blocksize; rc = bitxor_buf(ibuf, obuf, n, blocksize); if (rc != n-blocksize) { fprintf(stderr, "n=%d but only did %d bytes (blocksize=%d)\n", n, rc, blocksize); } p = fwrite(obuf, 1, rc, stdout); if (p != rc) { perror("write"); return -1; } /* copy over the last sv for bitxor with the next block */ for (i=0; i