int diff = 0;
for (int i = 0; i < dim / 64; i++)
{
unsigned__int64 x1 = ((unsigned__int64 *)px1)[i];
unsigned__int64 x2 = ((unsigned__int64 *)px2)[i];
unsigned__int64 x = x1 ^ x2;
diff += (int)_mm_popcnt_u64(x);
}
int diff = 0;
for (int i = 0; i < len; i++)
{
ulong t = a[i] ^ b[i];
t -= ((t >> 1)& 0x5555555555555555UL);
t = (t & 0x3333333333333333UL) + ((t >> 2) & 0x3333333333333333UL);
t = ((t + (t >> 4) & 0x0F0F0F0F0F0F0F0FUL) * 0x0101010101010101UL) >> 56;
diff += (int)t;
}
#if !defined HAVE_BITCOUNT_H__
#define HAVE_BITCOUNT_H__
// This file is part of the FXT library.
// Copyright (C) 2010, 2011, 2012 Joerg Arndt
// License: GNU General Public License version 3 or later,
// see the file COPYING.txt in the main directory.
#include "fxttypes.h"
#include "bits/bitsperlong.h"
#include "bits/bitasm.h"
static inline ulong bit_count(ulong x)
// Return number of set bits.
// The sequence of values returned for x = 0, 1, 2, 3, ... is
// 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, ...
// (OEIS sequence A000120).
{
#ifdef HAVE_AMD64_POPCNT // currently not auto-detected
return asm_bit_count(x);
#else // HAVE_AMD64_POPCNT
#if BITS_PER_LONG == 32
// // VERSION 1:
// x = (0x55555555UL & x) + (0x55555555UL & (x>> 1)); // 0-2 in 2 bits
// x = (0x33333333UL & x) + (0x33333333UL & (x>> 2)); // 0-4 in 4 bits
// x = (0x0f0f0f0fUL & x) + (0x0f0f0f0fUL & (x>> 4)); // 0-8 in 8 bits
// x = (0x00ff00ffUL & x) + (0x00ff00ffUL & (x>> 8)); // 0-16 in 16 bits
// x = (0x0000ffffUL & x) + (0x0000ffffUL & (x>>16)); // 0-32 in 32 bits
// return x;
// // VERSION 2:
// x = ((x>>1) & 0x55555555UL) + (x & 0x55555555UL); // 0-2 in 2 bits
// x = ((x>>2) & 0x33333333UL) + (x & 0x33333333UL); // 0-4 in 4 bits
// x = ((x>>4) + x) & 0x0f0f0f0fUL; // 0-8 in 8 bits
// x += x>> 8; // 0-16 in 8 bits
// x += x>>16; // 0-32 in 8 bits
// return x & 0xff;
// VERSION 3:
x -= (x>>1) & 0x55555555UL; // 0-2 in 2 bits
x = ((x>>2) & 0x33333333UL) + (x & 0x33333333UL); // 0-4 in 4 bits
x = ((x>>4) + x) & 0x0f0f0f0fUL; // 0-8 in 8 bits
x *= 0x01010101UL;
return x>>24;
#endif
#if BITS_PER_LONG == 64
// // VERSION 1:
// x = (0x5555555555555555UL & x) + (0x5555555555555555UL & (x>> 1)); // 0-2 in 2 bits
// x = (0x3333333333333333UL & x) + (0x3333333333333333UL & (x>> 2)); // 0-4 in 4 bits
// x = (0x0f0f0f0f0f0f0f0fUL & x) + (0x0f0f0f0f0f0f0f0fUL & (x>> 4)); // 0-8 in 8 bits
// x = (0x00ff00ff00ff00ffUL & x) + (0x00ff00ff00ff00ffUL & (x>> 8)); // 0-16 in 16 bits
// x = (0x0000ffff0000ffffUL & x) + (0x0000ffff0000ffffUL & (x>>16)); // 0-32 in 32 bits
// x = (0x00000000ffffffffUL & x) + (0x00000000ffffffffUL & (x>>32)); // 0-64 in 64 bits
// return x;
// // VERSION 2:
// x = ((x>>1) & 0x5555555555555555UL) + (x & 0x5555555555555555UL); // 0-2 in 2 bits
// x = ((x>>2) & 0x3333333333333333UL) + (x & 0x3333333333333333UL); // 0-4 in 4 bits
// x = ((x>>4) + x) & 0x0f0f0f0f0f0f0f0fUL; // 0-8 in 8 bits
// x += x>> 8; // 0-16 in 8 bits
// x += x>>16; // 0-32 in 8 bits
// x += x>>32; // 0-64 in 8 bits
// return x & 0xff;
// VERSION 3:
x -= (x>>1) & 0x5555555555555555UL; // 0-2 in 2 bits
x = ((x>>2) & 0x3333333333333333UL) + (x & 0x3333333333333333UL); // 0-4 in 4 bits
x = ((x>>4) + x) & 0x0f0f0f0f0f0f0f0fUL; // 0-8 in 8 bits
x *= 0x0101010101010101UL;
return x>>56;
#endif
#endif // HAVE_AMD64_POPCNT
}
// -------------------------
static inline ulong bit_count_15(ulong x)
// Return number of set bits, must have at most 15 set bits.
{
#if BITS_PER_LONG == 32
x -= (x>>1) & 0x55555555UL; // 0-2 in 2 bits
x = ((x>>2) & 0x33333333UL) + (x & 0x33333333UL); // 0-4 in 4 bits
x *= 0x11111111UL;
return x>>28;
#endif
#if BITS_PER_LONG == 64
x -= (x>>1) & 0x5555555555555555UL; // 0-2 in 2 bits
x = ((x>>2) & 0x3333333333333333UL) + (x & 0x3333333333333333UL); // 0-4 in 4 bits
x *= 0x1111111111111111UL;
return x>>60;
#endif
}
// -------------------------
static inline ulong bit_count_3(ulong x)
// Return number of set bits, must have at most 3 set bits.
{
#if BITS_PER_LONG == 32
x -= (x>>1) & 0x55555555UL; // 0-2 in 2 bits
x *= 0x55555555UL;
return x>>30;
#endif
#if BITS_PER_LONG == 64
x -= (x>>1) & 0x5555555555555555UL; // 0-2 in 2 bits
x *= 0x5555555555555555UL;
return x>>62;
#endif
}
// -------------------------
static inline int bit_count_cmp(const ulong &a, const ulong &b)
// Compare bit counts of a and b.
{
ulong ca = bit_count(a);
ulong cb = bit_count(b);
return ( ca==cb ? 0 : (ca>cb ? +1 : -1) );
}
// -------------------------
static inline ulong bit_count_sparse(ulong x)
// Return number of bits set.
{
#if 0
// The loop will execute once for each bit of x set:
ulong n = 0;
while ( x ) { ++n; x &= (x-1); }
return n;
#else
// The loop will execute ceil(c/4) if c bits of x are set:
ulong n = 0;
do
{
n += (x!=0); x &= (x-1);
n += (x!=0); x &= (x-1);
n += (x!=0); x &= (x-1);
n += (x!=0); x &= (x-1);
}
while ( x );
return n;
#endif
}
// -------------------------
static inline ulong bit_count_dense(ulong x)
// Return number of bits set.
// The loop (of bit_count_sparse()) will execute once for
// each unset bit (i.e. zero) of x.
{
return BITS_PER_LONG - bit_count_sparse( ~x );
}
// -------------------------
static inline ulong bit_block_count(ulong x)
// Return number of bit blocks.
// E.g.:
// ..1..11111...111. -> 3
// ...1..11111...111 -> 3
// ......1.....1.1.. -> 3
// .........111.1111 -> 2
{
// return bit_count(gray_code(x))/2 + (x & 1);
return (x & 1) + bit_count( (x^(x>>1)) ) / 2;
}
// -------------------------
static inline ulong bit_block_ge2_count(ulong x)
// Return number of bit blocks with at least 2 bits.
// E.g.:
// ..1..11111...111. -> 2
// ...1..11111...111 -> 2
// ......1.....1.1.. -> 0
// .........111.1111 -> 2
{
// return bit_block_count( interior_ones(x) );
return bit_block_count( x & ( (x<<1) & (x>>1) ) );
}
// -------------------------
static inline ulong bit_count_01(ulong x)
// Return number of bits in a word
// for words of the special form 00...0001...11
{
#if defined BITS_USE_ASM
if ( 1>=x ) return x;
x = asm_bsr(x);
return x + 1;
#else // BITS_USE_ASM
ulong ct = 0;
ulong a;
#if BITS_PER_LONG == 64
a = (x & (1UL<<32)) >> (32-5); // test bit 32
x >>= a; ct += a;
#endif
a = (x & (1UL<<16)) >> (16-4); // test bit 16
x >>= a; ct += a;
a = (x & (1UL<<8)) >> (8-3); // test bit 8
x >>= a; ct += a;
a = (x & (1UL<<4)) >> (4-2); // test bit 4
x >>= a; ct += a;
a = (x & (1UL<<2)) >> (2-1); // test bit 2
x >>= a; ct += a;
a = (x & (1UL<<1)) >> (1-0); // test bit 1
x >>= a; ct += a;
ct += x & 1; // test bit 0
return ct;
#endif // BITS_USE_ASM
}
// -------------------------
// bits/bitcount-v.cc:
ulong bit_count_v(const ulong *x, ulong n);
#endif // !defined HAVE_BITCOUNT_H__