The code below is something I wrote a while back for Linux - it finds the highest set bit, which I think is what you are asking for. It doesn't follow your exact specs, but should be easily adaptable.
Further notes:
- A return of 0 means that bit-0 was set; if no bits are found then 64 is returned.
- This assembler is written for the calling convention used by GCC under Linux. I don't know how this differs under Mac OS X - you need to check.
- Input is a 64-bit unsigned int.
- Each CPU architecture is written into a separate .S source file and selectively compiled using 'gcc' depending on the target being built. I don't use inline assembler.
x86:
/*
* Find the highest set bit in a bitboard.
*
* %eax: &bb
*/
.globl x86_msb;
.type x86_msb,@function;
x86_msb:
mov 4(%eax), %edx
bsr %edx, %eax
jz msb_z1
add $32, %eax
ret
msb_z1:
mov (%eax), %edx
bsr %edx, %eax
jz msb_z2
ret
msb_z2:
mov $64, %eax
ret
x86_64:
/*
* Return the offset of the highest set bit in the bitmask
*
* %rdi: &bb
*/
.globl x64_msb;
.type x64_msb,@function;
x64_msb:
movq (%rdi), %rdi
bsrq %rdi, %rax
jz msb_empty
ret
msb_empty:
mov $64, %eax
ret
Here are the Windows implementations (.asm file):
x86:
;;
;; Return the offset of the highest set bit in the bitmask
;;
;; ECX: &bb
;;
public @x86_msb@4
@x86_msb@4:
mov edx, dword ptr [ecx + 4] ; bb (high)
bsr eax, edx
jz msb_z1
add eax, 32
ret
msb_z1:
mov edx, dword ptr [ecx] ; bb (low)
bsr eax, edx
jz msb_z2
ret
msb_z2:
mov eax, 64
ret ; bb is empty
x86_64:
;;
;; Return the offset of the highest set bit in the bitmask
;;
;; RCX: &bb
;;
x64_msb PROC
mov r8, qword ptr [rcx] ; r8 = bb
bsr rax, r8 ; rax = lsb(bb)
jz msb_empty
ret
msb_empty:
mov eax, 64 ; bb was empty
ret
x64_msb ENDP