__asm {
// dot_nd = - ( normal.x * ray_dir.x + normal.y * ray_dir.y + normal.z * ray_dir.z );
// optimized version :
// xmm0 <--------------------------- 0
// xmm1
// xmm2
// xmm3 <--------------------------- ray->src
// xmm4 <--------------------------- ray->dir
// xmm5 <--------------------------- dot_nd
// xmm6 <--------------------------- t
// xmm7
xorps xmm0, xmm0 ;
mov ebx, dword ptr ray ;
mov edi, dword ptr state ;
mov esi, dword ptr this ;
movaps xmm3, xmmword ptr [ebx]e_Ray.src ;
movaps xmm4, xmmword ptr [ebx]e_Ray.dir ;
movaps xmm5, xmmword ptr [esi]this.normal ;
movaps xmm6, xmm5 ;
prefetchnta byte ptr [esi+TYPE this] ;
movaps xmm7, xmm4 ;
mulps xmm5, xmm7 ;
movhlps xmm7, xmm5 ;
addss xmm7, xmm5 ;
shufps xmm5, xmm5, 1 ;
addss xmm7, xmm5 ;
movaps xmm5, xmm0 ;
subss xmm5, xmm7 ;
// if( dot_nd <= 0.0f )
comiss xmm0, xmm5 ;
movss dword ptr dot_nd, xmm5 ;
// return false;
jae RETURN_FALSE ;
; 161 : t = ray_src.x * normal.x + ray_src.y * normal.y + ray_src.z * normal.z + normal.w;
movaps xmm7, xmm3 ;
mulps xmm7, xmm6 ;
movhlps xmm6, xmm7 ;
addss xmm6, xmm7 ;
shufps xmm7, xmm7, 253 ;
addss xmm6, xmm7 ;
movhlps xmm7, xmm7 ;
addss xmm6, xmm7 ;
// if( t <= t_near * dot_nd )
movss xmm7, dword ptr [ebx]e_Ray.t_near ;
mulss xmm7, xmm5 ;
comiss xmm7, xmm6 ;
movss dword ptr t, xmm6 ;
// return false;
jae RETURN_FALSE ;
// t_far = MIN( ray->t_far, state->t );
movss xmm7, dword ptr [edi]e_RayState.t ;
comiss xmm7, dword ptr [ebx]e_Ray.t_far ;
jbe CASE_BELOW ;
movss xmm7, dword ptr [ebx]e_Ray.t_far ;
CASE_BELOW:
// if( t >= t_far * dot_nd )
mulss xmm7, xmm5 ;
comiss xmm6, xmm7 ;
// return false;
jae RETURN_FALSE ;
// hit.arr[ projX ] = ray_src.arr[ projX ] * dot_nd + ray_dir.arr[ projX ] * t;
movzx eax, byte ptr [esi]this.projX ;
add eax, eax ;
add eax, eax ;
movss xmm1, dword ptr [ebx+eax]e_Ray.src ;
movss xmm2, dword ptr [ebx+eax]e_Ray.dir ;
mulss xmm1, xmm5 ;
mulss xmm2, xmm6 ;
addss xmm1, xmm2 ;
movss dword ptr [hit+eax], xmm1 ;
// hit.arr[ projY ] = ray_src.arr[ projY ] * dot_nd + ray_dir.arr[ projY ] * t;
movzx ecx, byte ptr [esi]this.projY ;
add ecx, ecx ;
add ecx, ecx ;
movss xmm1, dword ptr [ebx+ecx]e_Ray.src ;
movss xmm2, dword ptr [ebx+ecx]e_Ray.dir ;
mulss xmm1, xmm5 ;
mulss xmm2, xmm6 ;
addss xmm1, xmm2 ;
movss dword ptr [hit+ecx], xmm1 ;
// bary.x = hit.arr[ projX ] * la.x + hit.arr[ projY ] * la.y + la.z * dot_nd;
movss xmm7, dword ptr [esi]this.la.x ;
mulss xmm7, dword ptr [hit+eax] ;
movss xmm1, dword ptr [esi]this.la.y ;
mulss xmm1, dword ptr [hit+ecx] ;
addss xmm7, xmm1 ;
movss xmm1, dword ptr [esi]this.la.z ;
mulss xmm1, xmm5 ;
addss xmm7, xmm1 ;
// if( bary.x < 0.0f || bary.x > dot_nd )
comiss xmm0, xmm7 ;
ja RETURN_FALSE ;
comiss xmm7, xmm5 ;
ja RETURN_FALSE ;
// bary.y = hit.arr[ projX ] * lb.x + hit.arr[ projY ] * lb.y + lb.z * dot_nd;
movss xmm2, dword ptr [esi]this.lb.x ;
mulss xmm2, dword ptr [hit+eax] ;
movss xmm1, dword ptr [esi]this.lb.y ;
mulss xmm1, dword ptr [hit+ecx] ;
addss xmm2, xmm1 ;
movss xmm1, dword ptr [esi]this.lb.z ;
mulss xmm1, xmm5 ;
addss xmm2, xmm1 ;
// if( bary.y < 0.0f || bary.y > dot_nd )
comiss xmm0, xmm2 ;
ja RETURN_FALSE ;
comiss xmm2, xmm5 ;
ja RETURN_FALSE ;
// bary.z = dot_nd - bary.x - bary.y;
movaps xmm1, xmm5 ;
subss xmm1, xmm7 ;
subss xmm1, xmm2 ;
// if( bary.z < 0.0f || bary.z > dot_nd )
comiss xmm0, xmm1 ;
ja RETURN_FALSE ;
comiss xmm1, xmm5 ;
ja RETURN_FALSE ;
movss dword ptr bary.x, xmm7 ;
movss dword ptr bary.y, xmm2 ;
movss dword ptr bary.z, xmm1 ;
} // __asm
// dot_nd = - ( normal.x * ray_dir.x + normal.y * ray_dir.y + normal.z * ray_dir.z );
// optimized version :
// xmm0 <--------------------------- 0
// xmm1
// xmm2
// xmm3 <--------------------------- ray->src
// xmm4 <--------------------------- ray->dir
// xmm5 <--------------------------- dot_nd
// xmm6 <--------------------------- t
// xmm7
xorps xmm0, xmm0 ;
mov ebx, dword ptr ray ;
mov edi, dword ptr state ;
mov esi, dword ptr this ;
movaps xmm3, xmmword ptr [ebx]e_Ray.src ;
movaps xmm4, xmmword ptr [ebx]e_Ray.dir ;
movaps xmm5, xmmword ptr [esi]this.normal ;
movaps xmm6, xmm5 ;
prefetchnta byte ptr [esi+TYPE this] ;
movaps xmm7, xmm4 ;
mulps xmm5, xmm7 ;
movhlps xmm7, xmm5 ;
addss xmm7, xmm5 ;
shufps xmm5, xmm5, 1 ;
addss xmm7, xmm5 ;
movaps xmm5, xmm0 ;
subss xmm5, xmm7 ;
// if( dot_nd <= 0.0f )
comiss xmm0, xmm5 ;
movss dword ptr dot_nd, xmm5 ;
// return false;
jae RETURN_FALSE ;
; 161 : t = ray_src.x * normal.x + ray_src.y * normal.y + ray_src.z * normal.z + normal.w;
movaps xmm7, xmm3 ;
mulps xmm7, xmm6 ;
movhlps xmm6, xmm7 ;
addss xmm6, xmm7 ;
shufps xmm7, xmm7, 253 ;
addss xmm6, xmm7 ;
movhlps xmm7, xmm7 ;
addss xmm6, xmm7 ;
// if( t <= t_near * dot_nd )
movss xmm7, dword ptr [ebx]e_Ray.t_near ;
mulss xmm7, xmm5 ;
comiss xmm7, xmm6 ;
movss dword ptr t, xmm6 ;
// return false;
jae RETURN_FALSE ;
// t_far = MIN( ray->t_far, state->t );
movss xmm7, dword ptr [edi]e_RayState.t ;
comiss xmm7, dword ptr [ebx]e_Ray.t_far ;
jbe CASE_BELOW ;
movss xmm7, dword ptr [ebx]e_Ray.t_far ;
CASE_BELOW:
// if( t >= t_far * dot_nd )
mulss xmm7, xmm5 ;
comiss xmm6, xmm7 ;
// return false;
jae RETURN_FALSE ;
// hit.arr[ projX ] = ray_src.arr[ projX ] * dot_nd + ray_dir.arr[ projX ] * t;
movzx eax, byte ptr [esi]this.projX ;
add eax, eax ;
add eax, eax ;
movss xmm1, dword ptr [ebx+eax]e_Ray.src ;
movss xmm2, dword ptr [ebx+eax]e_Ray.dir ;
mulss xmm1, xmm5 ;
mulss xmm2, xmm6 ;
addss xmm1, xmm2 ;
movss dword ptr [hit+eax], xmm1 ;
// hit.arr[ projY ] = ray_src.arr[ projY ] * dot_nd + ray_dir.arr[ projY ] * t;
movzx ecx, byte ptr [esi]this.projY ;
add ecx, ecx ;
add ecx, ecx ;
movss xmm1, dword ptr [ebx+ecx]e_Ray.src ;
movss xmm2, dword ptr [ebx+ecx]e_Ray.dir ;
mulss xmm1, xmm5 ;
mulss xmm2, xmm6 ;
addss xmm1, xmm2 ;
movss dword ptr [hit+ecx], xmm1 ;
// bary.x = hit.arr[ projX ] * la.x + hit.arr[ projY ] * la.y + la.z * dot_nd;
movss xmm7, dword ptr [esi]this.la.x ;
mulss xmm7, dword ptr [hit+eax] ;
movss xmm1, dword ptr [esi]this.la.y ;
mulss xmm1, dword ptr [hit+ecx] ;
addss xmm7, xmm1 ;
movss xmm1, dword ptr [esi]this.la.z ;
mulss xmm1, xmm5 ;
addss xmm7, xmm1 ;
// if( bary.x < 0.0f || bary.x > dot_nd )
comiss xmm0, xmm7 ;
ja RETURN_FALSE ;
comiss xmm7, xmm5 ;
ja RETURN_FALSE ;
// bary.y = hit.arr[ projX ] * lb.x + hit.arr[ projY ] * lb.y + lb.z * dot_nd;
movss xmm2, dword ptr [esi]this.lb.x ;
mulss xmm2, dword ptr [hit+eax] ;
movss xmm1, dword ptr [esi]this.lb.y ;
mulss xmm1, dword ptr [hit+ecx] ;
addss xmm2, xmm1 ;
movss xmm1, dword ptr [esi]this.lb.z ;
mulss xmm1, xmm5 ;
addss xmm2, xmm1 ;
// if( bary.y < 0.0f || bary.y > dot_nd )
comiss xmm0, xmm2 ;
ja RETURN_FALSE ;
comiss xmm2, xmm5 ;
ja RETURN_FALSE ;
// bary.z = dot_nd - bary.x - bary.y;
movaps xmm1, xmm5 ;
subss xmm1, xmm7 ;
subss xmm1, xmm2 ;
// if( bary.z < 0.0f || bary.z > dot_nd )
comiss xmm0, xmm1 ;
ja RETURN_FALSE ;
comiss xmm1, xmm5 ;
ja RETURN_FALSE ;
movss dword ptr bary.x, xmm7 ;
movss dword ptr bary.y, xmm2 ;
movss dword ptr bary.z, xmm1 ;
} // __asm