diff options
Diffstat (limited to 'VRCSDK3Worlds/Assets/Editor/x64/Bakery/lodselect.ptx')
-rw-r--r-- | VRCSDK3Worlds/Assets/Editor/x64/Bakery/lodselect.ptx | 1927 |
1 files changed, 1927 insertions, 0 deletions
diff --git a/VRCSDK3Worlds/Assets/Editor/x64/Bakery/lodselect.ptx b/VRCSDK3Worlds/Assets/Editor/x64/Bakery/lodselect.ptx new file mode 100644 index 00000000..0c2cf561 --- /dev/null +++ b/VRCSDK3Worlds/Assets/Editor/x64/Bakery/lodselect.ptx @@ -0,0 +1,1927 @@ +// +// Generated by NVIDIA NVVM Compiler +// +// Compiler Build ID: CL-23083092 +// Cuda compilation tools, release 9.1, V9.1.85 +// Based on LLVM 3.4svn +// + +.version 6.1 +.target sm_30 +.address_size 64 + + // .globl _Z6oxMainv +.global .align 8 .b8 pixelID[8]; +.global .align 8 .b8 resolution[8]; +.global .align 4 .b8 normal[12]; +.global .align 4 .b8 camPos[12]; +.global .align 4 .b8 root[4]; +.global .align 4 .u32 imageEnabled; +.global .texref lightmap; +.global .align 16 .b8 tileInfo[16]; +.global .align 4 .u32 additive; +.global .align 1 .b8 image_HDR[1]; +.global .align 1 .b8 image_HDR2[1]; +.global .align 8 .b8 texCoords[8]; +.global .align 1 .b8 uvpos[1]; +.global .align 1 .b8 uvnormal[1]; +.global .align 1 .b8 rnd_seeds[1]; +.global .align 1 .b8 lmidLODs[1]; +.global .align 4 .b8 _ZN21rti_internal_typeinfo7pixelIDE[8] = {82, 97, 121, 0, 8, 0, 0, 0}; +.global .align 4 .b8 _ZN21rti_internal_typeinfo10resolutionE[8] = {82, 97, 121, 0, 8, 0, 0, 0}; +.global .align 4 .b8 _ZN21rti_internal_typeinfo6normalE[8] = {82, 97, 121, 0, 12, 0, 0, 0}; +.global .align 4 .b8 _ZN21rti_internal_typeinfo6camPosE[8] = {82, 97, 121, 0, 12, 0, 0, 0}; +.global .align 4 .b8 _ZN21rti_internal_typeinfo4rootE[8] = {82, 97, 121, 0, 4, 0, 0, 0}; +.global .align 4 .b8 _ZN21rti_internal_typeinfo12imageEnabledE[8] = {82, 97, 121, 0, 4, 0, 0, 0}; +.global .align 4 .b8 _ZN21rti_internal_typeinfo8tileInfoE[8] = {82, 97, 121, 0, 16, 0, 0, 0}; +.global .align 4 .b8 _ZN21rti_internal_typeinfo8additiveE[8] = {82, 97, 121, 0, 4, 0, 0, 0}; +.global .align 4 .b8 _ZN21rti_internal_typeinfo9texCoordsE[8] = {82, 97, 121, 0, 8, 0, 0, 0}; +.global .align 8 .u64 _ZN21rti_internal_register20reg_bitness_detectorE; +.global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail0E; +.global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail1E; +.global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail2E; +.global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail3E; +.global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail4E; +.global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail5E; +.global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail6E; +.global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail7E; +.global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail8E; +.global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail9E; +.global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail0E; +.global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail1E; +.global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail2E; +.global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail3E; +.global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail4E; +.global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail5E; +.global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail6E; +.global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail7E; +.global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail8E; +.global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail9E; +.global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_xE; +.global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_yE; +.global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_zE; +.global .align 8 .b8 _ZN21rti_internal_typename7pixelIDE[6] = {117, 105, 110, 116, 50, 0}; +.global .align 8 .b8 _ZN21rti_internal_typename10resolutionE[6] = {117, 105, 110, 116, 50, 0}; +.global .align 8 .b8 _ZN21rti_internal_typename6normalE[7] = {102, 108, 111, 97, 116, 51, 0}; +.global .align 8 .b8 _ZN21rti_internal_typename6camPosE[7] = {102, 108, 111, 97, 116, 51, 0}; +.global .align 16 .b8 _ZN21rti_internal_typename4rootE[9] = {114, 116, 79, 98, 106, 101, 99, 116, 0}; +.global .align 4 .b8 _ZN21rti_internal_typename12imageEnabledE[4] = {105, 110, 116, 0}; +.global .align 8 .b8 _ZN21rti_internal_typename8tileInfoE[6] = {117, 105, 110, 116, 52, 0}; +.global .align 4 .b8 _ZN21rti_internal_typename8additiveE[4] = {105, 110, 116, 0}; +.global .align 8 .b8 _ZN21rti_internal_typename9texCoordsE[7] = {102, 108, 111, 97, 116, 50, 0}; +.global .align 4 .u32 _ZN21rti_internal_typeenum7pixelIDE = 4919; +.global .align 4 .u32 _ZN21rti_internal_typeenum10resolutionE = 4919; +.global .align 4 .u32 _ZN21rti_internal_typeenum6normalE = 4919; +.global .align 4 .u32 _ZN21rti_internal_typeenum6camPosE = 4919; +.global .align 4 .u32 _ZN21rti_internal_typeenum4rootE = 4919; +.global .align 4 .u32 _ZN21rti_internal_typeenum12imageEnabledE = 4919; +.global .align 4 .u32 _ZN21rti_internal_typeenum8tileInfoE = 4919; +.global .align 4 .u32 _ZN21rti_internal_typeenum8additiveE = 4919; +.global .align 4 .u32 _ZN21rti_internal_typeenum9texCoordsE = 4919; +.global .align 16 .b8 _ZN21rti_internal_semantic7pixelIDE[14] = {114, 116, 76, 97, 117, 110, 99, 104, 73, 110, 100, 101, 120, 0}; +.global .align 16 .b8 _ZN21rti_internal_semantic10resolutionE[12] = {114, 116, 76, 97, 117, 110, 99, 104, 68, 105, 109, 0}; +.global .align 16 .b8 _ZN21rti_internal_semantic6normalE[17] = {97, 116, 116, 114, 105, 98, 117, 116, 101, 32, 110, 111, 114, 109, 97, 108, 0}; +.global .align 1 .b8 _ZN21rti_internal_semantic6camPosE[1]; +.global .align 1 .b8 _ZN21rti_internal_semantic4rootE[1]; +.global .align 1 .b8 _ZN21rti_internal_semantic12imageEnabledE[1]; +.global .align 1 .b8 _ZN21rti_internal_semantic8tileInfoE[1]; +.global .align 1 .b8 _ZN21rti_internal_semantic8additiveE[1]; +.global .align 16 .b8 _ZN21rti_internal_semantic9texCoordsE[20] = {97, 116, 116, 114, 105, 98, 117, 116, 101, 32, 116, 101, 120, 67, 111, 111, 114, 100, 115, 0}; +.global .align 1 .b8 _ZN23rti_internal_annotation7pixelIDE[1]; +.global .align 1 .b8 _ZN23rti_internal_annotation10resolutionE[1]; +.global .align 1 .b8 _ZN23rti_internal_annotation6normalE[1]; +.global .align 1 .b8 _ZN23rti_internal_annotation6camPosE[1]; +.global .align 1 .b8 _ZN23rti_internal_annotation4rootE[1]; +.global .align 1 .b8 _ZN23rti_internal_annotation12imageEnabledE[1]; +.global .align 1 .b8 _ZN23rti_internal_annotation8tileInfoE[1]; +.global .align 1 .b8 _ZN23rti_internal_annotation8additiveE[1]; +.global .align 1 .b8 _ZN23rti_internal_annotation9texCoordsE[1]; +.const .align 4 .b8 __cudart_i2opi_f[24] = {65, 144, 67, 60, 153, 149, 98, 219, 192, 221, 52, 245, 209, 87, 39, 252, 41, 21, 68, 78, 110, 131, 249, 162}; + +.visible .entry _Z6oxMainv( + +) +{ + .local .align 4 .b8 __local_depot0[40]; + .reg .b64 %SP; + .reg .b64 %SPL; + .reg .pred %p<120>; + .reg .b16 %rs<9>; + .reg .f32 %f<646>; + .reg .b32 %r<802>; + .reg .b64 %rd<183>; + + + mov.u64 %rd182, __local_depot0; + cvta.local.u64 %SP, %rd182; + ld.global.v2.u32 {%r295, %r296}, [pixelID]; + cvt.u64.u32 %rd53, %r295; + cvt.u64.u32 %rd54, %r296; + mov.u64 %rd57, uvnormal; + cvta.global.u64 %rd52, %rd57; + mov.u32 %r293, 2; + mov.u32 %r294, 4; + mov.u64 %rd56, 0; + // inline asm + call (%rd51), _rt_buffer_get_64, (%rd52, %r293, %r294, %rd53, %rd54, %rd56, %rd56); + // inline asm + ld.u32 %r1, [%rd51]; + shr.u32 %r299, %r1, 16; + cvt.u16.u32 %rs1, %r299; + and.b16 %rs2, %rs1, 255; + cvt.u16.u32 %rs3, %r1; + or.b16 %rs4, %rs3, %rs2; + setp.eq.s16 %p1, %rs4, 0; + mov.f32 %f595, 0f00000000; + mov.f32 %f596, %f595; + mov.f32 %f597, %f595; + @%p1 bra BB0_2; + + ld.u8 %rs5, [%rd51+1]; + and.b16 %rs7, %rs3, 255; + cvt.rn.f32.u16 %f170, %rs7; + div.rn.f32 %f171, %f170, 0f437F0000; + fma.rn.f32 %f172, %f171, 0f40000000, 0fBF800000; + cvt.rn.f32.u16 %f173, %rs5; + div.rn.f32 %f174, %f173, 0f437F0000; + fma.rn.f32 %f175, %f174, 0f40000000, 0fBF800000; + cvt.rn.f32.u16 %f176, %rs2; + div.rn.f32 %f177, %f176, 0f437F0000; + fma.rn.f32 %f178, %f177, 0f40000000, 0fBF800000; + mul.f32 %f179, %f175, %f175; + fma.rn.f32 %f180, %f172, %f172, %f179; + fma.rn.f32 %f181, %f178, %f178, %f180; + sqrt.rn.f32 %f182, %f181; + rcp.rn.f32 %f183, %f182; + mul.f32 %f595, %f172, %f183; + mul.f32 %f596, %f175, %f183; + mul.f32 %f597, %f178, %f183; + +BB0_2: + setp.eq.f32 %p2, %f596, 0f00000000; + setp.eq.f32 %p3, %f595, 0f00000000; + and.pred %p4, %p3, %p2; + setp.eq.f32 %p5, %f597, 0f00000000; + and.pred %p6, %p4, %p5; + @%p6 bra BB0_199; + + ld.global.v2.u32 {%r304, %r305}, [pixelID]; + cvt.u64.u32 %rd60, %r304; + cvt.u64.u32 %rd61, %r305; + mov.u64 %rd70, uvpos; + cvta.global.u64 %rd59, %rd70; + mov.u32 %r301, 12; + // inline asm + call (%rd58), _rt_buffer_get_64, (%rd59, %r293, %r301, %rd60, %rd61, %rd56, %rd56); + // inline asm + ld.f32 %f184, [%rd58+8]; + ld.f32 %f185, [%rd58+4]; + ld.f32 %f186, [%rd58]; + mul.f32 %f187, %f186, 0f3456BF95; + mul.f32 %f188, %f185, 0f3456BF95; + mul.f32 %f189, %f184, 0f3456BF95; + abs.f32 %f190, %f595; + div.rn.f32 %f191, %f187, %f190; + abs.f32 %f192, %f596; + div.rn.f32 %f193, %f188, %f192; + abs.f32 %f194, %f597; + div.rn.f32 %f195, %f189, %f194; + abs.f32 %f196, %f191; + abs.f32 %f197, %f193; + abs.f32 %f198, %f195; + mov.f32 %f199, 0f38D1B717; + max.f32 %f200, %f196, %f199; + max.f32 %f201, %f197, %f199; + max.f32 %f202, %f198, %f199; + fma.rn.f32 %f7, %f595, %f200, %f186; + fma.rn.f32 %f8, %f596, %f201, %f185; + fma.rn.f32 %f9, %f597, %f202, %f184; + setp.gt.f32 %p7, %f190, %f194; + neg.f32 %f203, %f596; + selp.f32 %f204, %f203, 0f00000000, %p7; + neg.f32 %f205, %f597; + selp.f32 %f206, %f595, %f205, %p7; + selp.f32 %f207, 0f00000000, %f596, %p7; + mul.f32 %f208, %f206, %f206; + fma.rn.f32 %f209, %f204, %f204, %f208; + fma.rn.f32 %f210, %f207, %f207, %f209; + sqrt.rn.f32 %f211, %f210; + rcp.rn.f32 %f212, %f211; + mul.f32 %f10, %f204, %f212; + mul.f32 %f11, %f206, %f212; + mul.f32 %f12, %f207, %f212; + mul.f32 %f213, %f597, %f11; + mul.f32 %f214, %f596, %f12; + sub.f32 %f13, %f213, %f214; + mul.f32 %f215, %f595, %f12; + mul.f32 %f216, %f597, %f10; + sub.f32 %f14, %f215, %f216; + mul.f32 %f217, %f596, %f10; + mul.f32 %f218, %f595, %f11; + sub.f32 %f15, %f217, %f218; + ld.global.v2.u32 {%r308, %r309}, [pixelID]; + cvt.u64.u32 %rd66, %r308; + cvt.u64.u32 %rd67, %r309; + mov.u64 %rd71, rnd_seeds; + cvta.global.u64 %rd65, %rd71; + // inline asm + call (%rd64), _rt_buffer_get_64, (%rd65, %r293, %r294, %rd66, %rd67, %rd56, %rd56); + // inline asm + add.u64 %rd72, %SP, 0; + cvta.to.local.u64 %rd2, %rd72; + mul.f32 %f16, %f7, 0f3456BF95; + mul.f32 %f17, %f8, 0f3456BF95; + mul.f32 %f18, %f9, 0f3456BF95; + ld.u32 %r312, [%rd64]; + mad.lo.s32 %r313, %r312, 1664525, 1013904223; + and.b32 %r314, %r313, 16777215; + cvt.rn.f32.u32 %f219, %r314; + fma.rn.f32 %f220, %f219, 0f33800000, 0f00000000; + mul.f32 %f221, %f220, 0f3F000000; + mad.lo.s32 %r2, %r313, 1664525, 1013904223; + and.b32 %r315, %r2, 16777215; + cvt.rn.f32.u32 %f222, %r315; + fma.rn.f32 %f223, %f222, 0f33800000, 0f00000000; + mul.f32 %f224, %f223, 0f3F000000; + sqrt.rn.f32 %f19, %f221; + mul.f32 %f604, %f224, 0f40C90FDB; + abs.f32 %f21, %f604; + setp.neu.f32 %p8, %f21, 0f7F800000; + mov.f32 %f598, %f604; + @%p8 bra BB0_5; + + mov.f32 %f225, 0f00000000; + mul.rn.f32 %f598, %f604, %f225; + +BB0_5: + mul.f32 %f226, %f598, 0f3F22F983; + cvt.rni.s32.f32 %r731, %f226; + cvt.rn.f32.s32 %f227, %r731; + neg.f32 %f228, %f227; + mov.f32 %f229, 0f3FC90FDA; + fma.rn.f32 %f230, %f228, %f229, %f598; + mov.f32 %f231, 0f33A22168; + fma.rn.f32 %f232, %f228, %f231, %f230; + mov.f32 %f233, 0f27C234C5; + fma.rn.f32 %f599, %f228, %f233, %f232; + abs.f32 %f234, %f598; + setp.leu.f32 %p9, %f234, 0f47CE4780; + @%p9 bra BB0_16; + + mov.b32 %r4, %f598; + shr.u32 %r5, %r4, 23; + shl.b32 %r318, %r4, 8; + or.b32 %r6, %r318, -2147483648; + mov.u32 %r723, 0; + mov.u64 %rd166, __cudart_i2opi_f; + mov.u32 %r722, -6; + mov.u64 %rd167, %rd2; + +BB0_7: + .pragma "nounroll"; + ld.const.u32 %r321, [%rd166]; + // inline asm + { + mad.lo.cc.u32 %r319, %r321, %r6, %r723; + madc.hi.u32 %r723, %r321, %r6, 0; + } + // inline asm + st.local.u32 [%rd167], %r319; + add.s64 %rd167, %rd167, 4; + add.s64 %rd166, %rd166, 4; + add.s32 %r722, %r722, 1; + setp.ne.s32 %p10, %r722, 0; + @%p10 bra BB0_7; + + and.b32 %r324, %r5, 255; + add.s32 %r325, %r324, -128; + shr.u32 %r326, %r325, 5; + and.b32 %r11, %r4, -2147483648; + st.local.u32 [%rd2+24], %r723; + mov.u32 %r327, 6; + sub.s32 %r328, %r327, %r326; + mul.wide.s32 %rd74, %r328, 4; + add.s64 %rd7, %rd2, %rd74; + ld.local.u32 %r724, [%rd7]; + ld.local.u32 %r725, [%rd7+-4]; + and.b32 %r14, %r5, 31; + setp.eq.s32 %p11, %r14, 0; + @%p11 bra BB0_10; + + mov.u32 %r329, 32; + sub.s32 %r330, %r329, %r14; + shr.u32 %r331, %r725, %r330; + shl.b32 %r332, %r724, %r14; + add.s32 %r724, %r331, %r332; + ld.local.u32 %r333, [%rd7+-8]; + shr.u32 %r334, %r333, %r330; + shl.b32 %r335, %r725, %r14; + add.s32 %r725, %r334, %r335; + +BB0_10: + shr.u32 %r336, %r725, 30; + shl.b32 %r337, %r724, 2; + add.s32 %r726, %r336, %r337; + shl.b32 %r20, %r725, 2; + shr.u32 %r338, %r726, 31; + shr.u32 %r339, %r724, 30; + add.s32 %r21, %r338, %r339; + setp.eq.s32 %p12, %r338, 0; + @%p12 bra BB0_11; + + not.b32 %r340, %r726; + neg.s32 %r728, %r20; + setp.eq.s32 %p13, %r20, 0; + selp.u32 %r341, 1, 0, %p13; + add.s32 %r726, %r341, %r340; + xor.b32 %r727, %r11, -2147483648; + bra.uni BB0_13; + +BB0_11: + mov.u32 %r727, %r11; + mov.u32 %r728, %r20; + +BB0_13: + clz.b32 %r730, %r726; + setp.eq.s32 %p14, %r730, 0; + shl.b32 %r342, %r726, %r730; + mov.u32 %r343, 32; + sub.s32 %r344, %r343, %r730; + shr.u32 %r345, %r728, %r344; + add.s32 %r346, %r345, %r342; + selp.b32 %r29, %r726, %r346, %p14; + mov.u32 %r347, -921707870; + mul.hi.u32 %r729, %r29, %r347; + setp.eq.s32 %p15, %r11, 0; + neg.s32 %r348, %r21; + selp.b32 %r731, %r21, %r348, %p15; + setp.lt.s32 %p16, %r729, 1; + @%p16 bra BB0_15; + + mul.lo.s32 %r349, %r29, -921707870; + shr.u32 %r350, %r349, 31; + shl.b32 %r351, %r729, 1; + add.s32 %r729, %r350, %r351; + add.s32 %r730, %r730, 1; + +BB0_15: + mov.u32 %r352, 126; + sub.s32 %r353, %r352, %r730; + shl.b32 %r354, %r353, 23; + add.s32 %r355, %r729, 1; + shr.u32 %r356, %r355, 7; + add.s32 %r357, %r356, 1; + shr.u32 %r358, %r357, 1; + add.s32 %r359, %r358, %r354; + or.b32 %r360, %r359, %r727; + mov.b32 %f599, %r360; + +BB0_16: + mul.rn.f32 %f27, %f599, %f599; + add.s32 %r37, %r731, 1; + and.b32 %r38, %r37, 1; + setp.eq.s32 %p17, %r38, 0; + @%p17 bra BB0_18; + + mov.f32 %f235, 0fBAB6061A; + mov.f32 %f236, 0f37CCF5CE; + fma.rn.f32 %f600, %f236, %f27, %f235; + bra.uni BB0_19; + +BB0_18: + mov.f32 %f237, 0f3C08839E; + mov.f32 %f238, 0fB94CA1F9; + fma.rn.f32 %f600, %f238, %f27, %f237; + +BB0_19: + @%p17 bra BB0_21; + + mov.f32 %f239, 0f3D2AAAA5; + fma.rn.f32 %f240, %f600, %f27, %f239; + mov.f32 %f241, 0fBF000000; + fma.rn.f32 %f601, %f240, %f27, %f241; + bra.uni BB0_22; + +BB0_21: + mov.f32 %f242, 0fBE2AAAA3; + fma.rn.f32 %f243, %f600, %f27, %f242; + mov.f32 %f244, 0f00000000; + fma.rn.f32 %f601, %f243, %f27, %f244; + +BB0_22: + fma.rn.f32 %f602, %f601, %f599, %f599; + @%p17 bra BB0_24; + + mov.f32 %f245, 0f3F800000; + fma.rn.f32 %f602, %f601, %f27, %f245; + +BB0_24: + and.b32 %r361, %r37, 2; + setp.eq.s32 %p20, %r361, 0; + @%p20 bra BB0_26; + + mov.f32 %f246, 0f00000000; + mov.f32 %f247, 0fBF800000; + fma.rn.f32 %f602, %f602, %f247, %f246; + +BB0_26: + @%p8 bra BB0_28; + + mov.f32 %f248, 0f00000000; + mul.rn.f32 %f604, %f604, %f248; + +BB0_28: + mul.f32 %f249, %f604, 0f3F22F983; + cvt.rni.s32.f32 %r741, %f249; + cvt.rn.f32.s32 %f250, %r741; + neg.f32 %f251, %f250; + fma.rn.f32 %f253, %f251, %f229, %f604; + fma.rn.f32 %f255, %f251, %f231, %f253; + fma.rn.f32 %f605, %f251, %f233, %f255; + abs.f32 %f257, %f604; + setp.leu.f32 %p22, %f257, 0f47CE4780; + @%p22 bra BB0_39; + + mov.b32 %r40, %f604; + shr.u32 %r41, %r40, 23; + shl.b32 %r364, %r40, 8; + or.b32 %r42, %r364, -2147483648; + cvta.to.local.u64 %rd169, %rd72; + mov.u32 %r733, 0; + mov.u64 %rd168, __cudart_i2opi_f; + mov.u32 %r732, -6; + +BB0_30: + .pragma "nounroll"; + ld.const.u32 %r367, [%rd168]; + // inline asm + { + mad.lo.cc.u32 %r365, %r367, %r42, %r733; + madc.hi.u32 %r733, %r367, %r42, 0; + } + // inline asm + st.local.u32 [%rd169], %r365; + add.s64 %rd169, %rd169, 4; + add.s64 %rd168, %rd168, 4; + add.s32 %r732, %r732, 1; + setp.ne.s32 %p23, %r732, 0; + @%p23 bra BB0_30; + + and.b32 %r370, %r41, 255; + add.s32 %r371, %r370, -128; + shr.u32 %r372, %r371, 5; + and.b32 %r47, %r40, -2147483648; + cvta.to.local.u64 %rd78, %rd72; + st.local.u32 [%rd78+24], %r733; + mov.u32 %r373, 6; + sub.s32 %r374, %r373, %r372; + mul.wide.s32 %rd79, %r374, 4; + add.s64 %rd13, %rd78, %rd79; + ld.local.u32 %r734, [%rd13]; + ld.local.u32 %r735, [%rd13+-4]; + and.b32 %r50, %r41, 31; + setp.eq.s32 %p24, %r50, 0; + @%p24 bra BB0_33; + + mov.u32 %r375, 32; + sub.s32 %r376, %r375, %r50; + shr.u32 %r377, %r735, %r376; + shl.b32 %r378, %r734, %r50; + add.s32 %r734, %r377, %r378; + ld.local.u32 %r379, [%rd13+-8]; + shr.u32 %r380, %r379, %r376; + shl.b32 %r381, %r735, %r50; + add.s32 %r735, %r380, %r381; + +BB0_33: + shr.u32 %r382, %r735, 30; + shl.b32 %r383, %r734, 2; + add.s32 %r736, %r382, %r383; + shl.b32 %r56, %r735, 2; + shr.u32 %r384, %r736, 31; + shr.u32 %r385, %r734, 30; + add.s32 %r57, %r384, %r385; + setp.eq.s32 %p25, %r384, 0; + @%p25 bra BB0_34; + + not.b32 %r386, %r736; + neg.s32 %r738, %r56; + setp.eq.s32 %p26, %r56, 0; + selp.u32 %r387, 1, 0, %p26; + add.s32 %r736, %r387, %r386; + xor.b32 %r737, %r47, -2147483648; + bra.uni BB0_36; + +BB0_34: + mov.u32 %r737, %r47; + mov.u32 %r738, %r56; + +BB0_36: + clz.b32 %r740, %r736; + setp.eq.s32 %p27, %r740, 0; + shl.b32 %r388, %r736, %r740; + mov.u32 %r389, 32; + sub.s32 %r390, %r389, %r740; + shr.u32 %r391, %r738, %r390; + add.s32 %r392, %r391, %r388; + selp.b32 %r65, %r736, %r392, %p27; + mov.u32 %r393, -921707870; + mul.hi.u32 %r739, %r65, %r393; + setp.eq.s32 %p28, %r47, 0; + neg.s32 %r394, %r57; + selp.b32 %r741, %r57, %r394, %p28; + setp.lt.s32 %p29, %r739, 1; + @%p29 bra BB0_38; + + mul.lo.s32 %r395, %r65, -921707870; + shr.u32 %r396, %r395, 31; + shl.b32 %r397, %r739, 1; + add.s32 %r739, %r396, %r397; + add.s32 %r740, %r740, 1; + +BB0_38: + mov.u32 %r398, 126; + sub.s32 %r399, %r398, %r740; + shl.b32 %r400, %r399, 23; + add.s32 %r401, %r739, 1; + shr.u32 %r402, %r401, 7; + add.s32 %r403, %r402, 1; + shr.u32 %r404, %r403, 1; + add.s32 %r405, %r404, %r400; + or.b32 %r406, %r405, %r737; + mov.b32 %f605, %r406; + +BB0_39: + mul.rn.f32 %f44, %f605, %f605; + and.b32 %r73, %r741, 1; + setp.eq.s32 %p30, %r73, 0; + @%p30 bra BB0_41; + + mov.f32 %f258, 0fBAB6061A; + mov.f32 %f259, 0f37CCF5CE; + fma.rn.f32 %f606, %f259, %f44, %f258; + bra.uni BB0_42; + +BB0_41: + mov.f32 %f260, 0f3C08839E; + mov.f32 %f261, 0fB94CA1F9; + fma.rn.f32 %f606, %f261, %f44, %f260; + +BB0_42: + @%p30 bra BB0_44; + + mov.f32 %f262, 0f3D2AAAA5; + fma.rn.f32 %f263, %f606, %f44, %f262; + mov.f32 %f264, 0fBF000000; + fma.rn.f32 %f607, %f263, %f44, %f264; + bra.uni BB0_45; + +BB0_44: + mov.f32 %f265, 0fBE2AAAA3; + fma.rn.f32 %f266, %f606, %f44, %f265; + mov.f32 %f267, 0f00000000; + fma.rn.f32 %f607, %f266, %f44, %f267; + +BB0_45: + fma.rn.f32 %f608, %f607, %f605, %f605; + @%p30 bra BB0_47; + + mov.f32 %f268, 0f3F800000; + fma.rn.f32 %f608, %f607, %f44, %f268; + +BB0_47: + and.b32 %r407, %r741, 2; + setp.eq.s32 %p33, %r407, 0; + @%p33 bra BB0_49; + + mov.f32 %f269, 0f00000000; + mov.f32 %f270, 0fBF800000; + fma.rn.f32 %f608, %f608, %f270, %f269; + +BB0_49: + mul.f32 %f279, %f19, %f602; + add.u64 %rd80, %SP, 28; + cvta.to.local.u64 %rd14, %rd80; + mul.f32 %f280, %f279, %f279; + mov.f32 %f281, 0f3F800000; + sub.f32 %f282, %f281, %f280; + mul.f32 %f283, %f19, %f608; + mul.f32 %f284, %f283, %f283; + sub.f32 %f285, %f282, %f284; + mov.f32 %f286, 0f00000000; + max.f32 %f287, %f286, %f285; + sqrt.rn.f32 %f288, %f287; + mul.f32 %f289, %f10, %f283; + mul.f32 %f290, %f11, %f283; + mul.f32 %f291, %f12, %f283; + fma.rn.f32 %f292, %f13, %f279, %f289; + fma.rn.f32 %f293, %f14, %f279, %f290; + fma.rn.f32 %f294, %f15, %f279, %f291; + fma.rn.f32 %f274, %f595, %f288, %f292; + fma.rn.f32 %f275, %f596, %f288, %f293; + fma.rn.f32 %f276, %f597, %f288, %f294; + abs.f32 %f295, %f274; + abs.f32 %f296, %f275; + abs.f32 %f297, %f276; + div.rn.f32 %f298, %f16, %f295; + div.rn.f32 %f299, %f17, %f296; + div.rn.f32 %f300, %f18, %f297; + abs.f32 %f301, %f298; + abs.f32 %f302, %f299; + abs.f32 %f303, %f300; + max.f32 %f304, %f301, %f302; + max.f32 %f305, %f304, %f303; + max.f32 %f277, %f305, %f199; + mov.u32 %r409, 0; + st.local.u32 [%rd14+8], %r409; + st.local.u32 [%rd14+4], %r409; + st.local.u32 [%rd14], %r409; + ld.global.u32 %r408, [root]; + mov.f32 %f278, 0f6C4ECB8F; + // inline asm + call _rt_trace_64, (%r408, %f7, %f8, %f9, %f274, %f275, %f276, %r409, %f277, %f278, %rd80, %r301); + // inline asm + ld.local.f32 %f307, [%rd14+4]; + setp.leu.f32 %p34, %f307, 0f00000000; + @%p34 bra BB0_52; + + ld.local.f32 %f308, [%rd14]; + div.rn.f32 %f309, %f308, 0f41200000; + cvt.rzi.s32.f32 %r74, %f309; + setp.lt.s32 %p35, %r74, 0; + @%p35 bra BB0_52; + + cvt.s64.s32 %rd89, %r74; + mov.u64 %rd93, lmidLODs; + cvta.global.u64 %rd82, %rd93; + mov.u32 %r413, 1; + // inline asm + call (%rd81), _rt_buffer_get_64, (%rd82, %r413, %r294, %rd89, %rd56, %rd56, %rd56); + // inline asm + ld.local.f32 %f310, [%rd14+4]; + ld.f32 %f311, [%rd81]; + max.f32 %f312, %f311, %f310; + // inline asm + call (%rd87), _rt_buffer_get_64, (%rd82, %r413, %r294, %rd89, %rd56, %rd56, %rd56); + // inline asm + st.f32 [%rd87], %f312; + +BB0_52: + mad.lo.s32 %r415, %r2, 1664525, 1013904223; + and.b32 %r416, %r415, 16777215; + cvt.rn.f32.u32 %f313, %r416; + fma.rn.f32 %f314, %f313, 0f33800000, 0f00000000; + mul.f32 %f315, %f314, 0f3F000000; + mad.lo.s32 %r75, %r415, 1664525, 1013904223; + and.b32 %r417, %r75, 16777215; + cvt.rn.f32.u32 %f316, %r417; + fma.rn.f32 %f317, %f316, 0f33800000, 0f3F800000; + mul.f32 %f318, %f317, 0f3F000000; + sqrt.rn.f32 %f56, %f315; + mul.f32 %f616, %f318, 0f40C90FDB; + abs.f32 %f58, %f616; + setp.neu.f32 %p36, %f58, 0f7F800000; + mov.f32 %f610, %f616; + @%p36 bra BB0_54; + + mul.rn.f32 %f610, %f616, %f286; + +BB0_54: + mul.f32 %f320, %f610, 0f3F22F983; + cvt.rni.s32.f32 %r751, %f320; + cvt.rn.f32.s32 %f321, %r751; + neg.f32 %f322, %f321; + fma.rn.f32 %f324, %f322, %f229, %f610; + fma.rn.f32 %f326, %f322, %f231, %f324; + fma.rn.f32 %f611, %f322, %f233, %f326; + abs.f32 %f328, %f610; + setp.leu.f32 %p37, %f328, 0f47CE4780; + @%p37 bra BB0_65; + + mov.b32 %r77, %f610; + shr.u32 %r78, %r77, 23; + shl.b32 %r420, %r77, 8; + or.b32 %r79, %r420, -2147483648; + cvta.to.local.u64 %rd171, %rd72; + mov.u32 %r743, 0; + mov.u64 %rd170, __cudart_i2opi_f; + mov.u32 %r742, -6; + +BB0_56: + .pragma "nounroll"; + ld.const.u32 %r423, [%rd170]; + // inline asm + { + mad.lo.cc.u32 %r421, %r423, %r79, %r743; + madc.hi.u32 %r743, %r423, %r79, 0; + } + // inline asm + st.local.u32 [%rd171], %r421; + add.s64 %rd171, %rd171, 4; + add.s64 %rd170, %rd170, 4; + add.s32 %r742, %r742, 1; + setp.ne.s32 %p38, %r742, 0; + @%p38 bra BB0_56; + + and.b32 %r426, %r78, 255; + add.s32 %r427, %r426, -128; + shr.u32 %r428, %r427, 5; + and.b32 %r84, %r77, -2147483648; + cvta.to.local.u64 %rd97, %rd72; + st.local.u32 [%rd97+24], %r743; + mov.u32 %r429, 6; + sub.s32 %r430, %r429, %r428; + mul.wide.s32 %rd98, %r430, 4; + add.s64 %rd20, %rd97, %rd98; + ld.local.u32 %r744, [%rd20]; + ld.local.u32 %r745, [%rd20+-4]; + and.b32 %r87, %r78, 31; + setp.eq.s32 %p39, %r87, 0; + @%p39 bra BB0_59; + + mov.u32 %r431, 32; + sub.s32 %r432, %r431, %r87; + shr.u32 %r433, %r745, %r432; + shl.b32 %r434, %r744, %r87; + add.s32 %r744, %r433, %r434; + ld.local.u32 %r435, [%rd20+-8]; + shr.u32 %r436, %r435, %r432; + shl.b32 %r437, %r745, %r87; + add.s32 %r745, %r436, %r437; + +BB0_59: + shr.u32 %r438, %r745, 30; + shl.b32 %r439, %r744, 2; + add.s32 %r746, %r438, %r439; + shl.b32 %r93, %r745, 2; + shr.u32 %r440, %r746, 31; + shr.u32 %r441, %r744, 30; + add.s32 %r94, %r440, %r441; + setp.eq.s32 %p40, %r440, 0; + @%p40 bra BB0_60; + + not.b32 %r442, %r746; + neg.s32 %r748, %r93; + setp.eq.s32 %p41, %r93, 0; + selp.u32 %r443, 1, 0, %p41; + add.s32 %r746, %r443, %r442; + xor.b32 %r747, %r84, -2147483648; + bra.uni BB0_62; + +BB0_60: + mov.u32 %r747, %r84; + mov.u32 %r748, %r93; + +BB0_62: + clz.b32 %r750, %r746; + setp.eq.s32 %p42, %r750, 0; + shl.b32 %r444, %r746, %r750; + mov.u32 %r445, 32; + sub.s32 %r446, %r445, %r750; + shr.u32 %r447, %r748, %r446; + add.s32 %r448, %r447, %r444; + selp.b32 %r102, %r746, %r448, %p42; + mov.u32 %r449, -921707870; + mul.hi.u32 %r749, %r102, %r449; + setp.eq.s32 %p43, %r84, 0; + neg.s32 %r450, %r94; + selp.b32 %r751, %r94, %r450, %p43; + setp.lt.s32 %p44, %r749, 1; + @%p44 bra BB0_64; + + mul.lo.s32 %r451, %r102, -921707870; + shr.u32 %r452, %r451, 31; + shl.b32 %r453, %r749, 1; + add.s32 %r749, %r452, %r453; + add.s32 %r750, %r750, 1; + +BB0_64: + mov.u32 %r454, 126; + sub.s32 %r455, %r454, %r750; + shl.b32 %r456, %r455, 23; + add.s32 %r457, %r749, 1; + shr.u32 %r458, %r457, 7; + add.s32 %r459, %r458, 1; + shr.u32 %r460, %r459, 1; + add.s32 %r461, %r460, %r456; + or.b32 %r462, %r461, %r747; + mov.b32 %f611, %r462; + +BB0_65: + mul.rn.f32 %f64, %f611, %f611; + add.s32 %r110, %r751, 1; + and.b32 %r111, %r110, 1; + setp.eq.s32 %p45, %r111, 0; + @%p45 bra BB0_67; + + mov.f32 %f329, 0fBAB6061A; + mov.f32 %f330, 0f37CCF5CE; + fma.rn.f32 %f612, %f330, %f64, %f329; + bra.uni BB0_68; + +BB0_67: + mov.f32 %f331, 0f3C08839E; + mov.f32 %f332, 0fB94CA1F9; + fma.rn.f32 %f612, %f332, %f64, %f331; + +BB0_68: + @%p45 bra BB0_70; + + mov.f32 %f333, 0f3D2AAAA5; + fma.rn.f32 %f334, %f612, %f64, %f333; + mov.f32 %f335, 0fBF000000; + fma.rn.f32 %f613, %f334, %f64, %f335; + bra.uni BB0_71; + +BB0_70: + mov.f32 %f336, 0fBE2AAAA3; + fma.rn.f32 %f337, %f612, %f64, %f336; + fma.rn.f32 %f613, %f337, %f64, %f286; + +BB0_71: + fma.rn.f32 %f614, %f613, %f611, %f611; + @%p45 bra BB0_73; + + fma.rn.f32 %f614, %f613, %f64, %f281; + +BB0_73: + and.b32 %r463, %r110, 2; + setp.eq.s32 %p48, %r463, 0; + @%p48 bra BB0_75; + + mov.f32 %f341, 0fBF800000; + fma.rn.f32 %f614, %f614, %f341, %f286; + +BB0_75: + @%p36 bra BB0_77; + + mul.rn.f32 %f616, %f616, %f286; + +BB0_77: + mul.f32 %f343, %f616, 0f3F22F983; + cvt.rni.s32.f32 %r761, %f343; + cvt.rn.f32.s32 %f344, %r761; + neg.f32 %f345, %f344; + fma.rn.f32 %f347, %f345, %f229, %f616; + fma.rn.f32 %f349, %f345, %f231, %f347; + fma.rn.f32 %f617, %f345, %f233, %f349; + abs.f32 %f351, %f616; + setp.leu.f32 %p50, %f351, 0f47CE4780; + @%p50 bra BB0_88; + + mov.b32 %r113, %f616; + shr.u32 %r114, %r113, 23; + shl.b32 %r466, %r113, 8; + or.b32 %r115, %r466, -2147483648; + cvta.to.local.u64 %rd173, %rd72; + mov.u32 %r753, 0; + mov.u64 %rd172, __cudart_i2opi_f; + mov.u32 %r752, -6; + +BB0_79: + .pragma "nounroll"; + ld.const.u32 %r469, [%rd172]; + // inline asm + { + mad.lo.cc.u32 %r467, %r469, %r115, %r753; + madc.hi.u32 %r753, %r469, %r115, 0; + } + // inline asm + st.local.u32 [%rd173], %r467; + add.s64 %rd173, %rd173, 4; + add.s64 %rd172, %rd172, 4; + add.s32 %r752, %r752, 1; + setp.ne.s32 %p51, %r752, 0; + @%p51 bra BB0_79; + + and.b32 %r472, %r114, 255; + add.s32 %r473, %r472, -128; + shr.u32 %r474, %r473, 5; + and.b32 %r120, %r113, -2147483648; + cvta.to.local.u64 %rd102, %rd72; + st.local.u32 [%rd102+24], %r753; + mov.u32 %r475, 6; + sub.s32 %r476, %r475, %r474; + mul.wide.s32 %rd103, %r476, 4; + add.s64 %rd26, %rd102, %rd103; + ld.local.u32 %r754, [%rd26]; + ld.local.u32 %r755, [%rd26+-4]; + and.b32 %r123, %r114, 31; + setp.eq.s32 %p52, %r123, 0; + @%p52 bra BB0_82; + + mov.u32 %r477, 32; + sub.s32 %r478, %r477, %r123; + shr.u32 %r479, %r755, %r478; + shl.b32 %r480, %r754, %r123; + add.s32 %r754, %r479, %r480; + ld.local.u32 %r481, [%rd26+-8]; + shr.u32 %r482, %r481, %r478; + shl.b32 %r483, %r755, %r123; + add.s32 %r755, %r482, %r483; + +BB0_82: + shr.u32 %r484, %r755, 30; + shl.b32 %r485, %r754, 2; + add.s32 %r756, %r484, %r485; + shl.b32 %r129, %r755, 2; + shr.u32 %r486, %r756, 31; + shr.u32 %r487, %r754, 30; + add.s32 %r130, %r486, %r487; + setp.eq.s32 %p53, %r486, 0; + @%p53 bra BB0_83; + + not.b32 %r488, %r756; + neg.s32 %r758, %r129; + setp.eq.s32 %p54, %r129, 0; + selp.u32 %r489, 1, 0, %p54; + add.s32 %r756, %r489, %r488; + xor.b32 %r757, %r120, -2147483648; + bra.uni BB0_85; + +BB0_83: + mov.u32 %r757, %r120; + mov.u32 %r758, %r129; + +BB0_85: + clz.b32 %r760, %r756; + setp.eq.s32 %p55, %r760, 0; + shl.b32 %r490, %r756, %r760; + mov.u32 %r491, 32; + sub.s32 %r492, %r491, %r760; + shr.u32 %r493, %r758, %r492; + add.s32 %r494, %r493, %r490; + selp.b32 %r138, %r756, %r494, %p55; + mov.u32 %r495, -921707870; + mul.hi.u32 %r759, %r138, %r495; + setp.eq.s32 %p56, %r120, 0; + neg.s32 %r496, %r130; + selp.b32 %r761, %r130, %r496, %p56; + setp.lt.s32 %p57, %r759, 1; + @%p57 bra BB0_87; + + mul.lo.s32 %r497, %r138, -921707870; + shr.u32 %r498, %r497, 31; + shl.b32 %r499, %r759, 1; + add.s32 %r759, %r498, %r499; + add.s32 %r760, %r760, 1; + +BB0_87: + mov.u32 %r500, 126; + sub.s32 %r501, %r500, %r760; + shl.b32 %r502, %r501, 23; + add.s32 %r503, %r759, 1; + shr.u32 %r504, %r503, 7; + add.s32 %r505, %r504, 1; + shr.u32 %r506, %r505, 1; + add.s32 %r507, %r506, %r502; + or.b32 %r508, %r507, %r757; + mov.b32 %f617, %r508; + +BB0_88: + mul.rn.f32 %f81, %f617, %f617; + and.b32 %r146, %r761, 1; + setp.eq.s32 %p58, %r146, 0; + @%p58 bra BB0_90; + + mov.f32 %f352, 0fBAB6061A; + mov.f32 %f353, 0f37CCF5CE; + fma.rn.f32 %f618, %f353, %f81, %f352; + bra.uni BB0_91; + +BB0_90: + mov.f32 %f354, 0f3C08839E; + mov.f32 %f355, 0fB94CA1F9; + fma.rn.f32 %f618, %f355, %f81, %f354; + +BB0_91: + @%p58 bra BB0_93; + + mov.f32 %f356, 0f3D2AAAA5; + fma.rn.f32 %f357, %f618, %f81, %f356; + mov.f32 %f358, 0fBF000000; + fma.rn.f32 %f619, %f357, %f81, %f358; + bra.uni BB0_94; + +BB0_93: + mov.f32 %f359, 0fBE2AAAA3; + fma.rn.f32 %f360, %f618, %f81, %f359; + fma.rn.f32 %f619, %f360, %f81, %f286; + +BB0_94: + fma.rn.f32 %f620, %f619, %f617, %f617; + @%p58 bra BB0_96; + + fma.rn.f32 %f620, %f619, %f81, %f281; + +BB0_96: + and.b32 %r509, %r761, 2; + setp.eq.s32 %p61, %r509, 0; + @%p61 bra BB0_98; + + mov.f32 %f364, 0fBF800000; + fma.rn.f32 %f620, %f620, %f364, %f286; + +BB0_98: + mul.f32 %f373, %f56, %f614; + mul.f32 %f374, %f373, %f373; + sub.f32 %f376, %f281, %f374; + mul.f32 %f377, %f56, %f620; + mul.f32 %f378, %f377, %f377; + sub.f32 %f379, %f376, %f378; + max.f32 %f381, %f286, %f379; + sqrt.rn.f32 %f382, %f381; + mul.f32 %f383, %f10, %f377; + mul.f32 %f384, %f11, %f377; + mul.f32 %f385, %f12, %f377; + fma.rn.f32 %f386, %f13, %f373, %f383; + fma.rn.f32 %f387, %f14, %f373, %f384; + fma.rn.f32 %f388, %f15, %f373, %f385; + fma.rn.f32 %f368, %f595, %f382, %f386; + fma.rn.f32 %f369, %f596, %f382, %f387; + fma.rn.f32 %f370, %f597, %f382, %f388; + abs.f32 %f389, %f368; + abs.f32 %f390, %f369; + abs.f32 %f391, %f370; + div.rn.f32 %f392, %f16, %f389; + div.rn.f32 %f393, %f17, %f390; + div.rn.f32 %f394, %f18, %f391; + abs.f32 %f395, %f392; + abs.f32 %f396, %f393; + abs.f32 %f397, %f394; + max.f32 %f398, %f395, %f396; + max.f32 %f399, %f398, %f397; + max.f32 %f371, %f399, %f199; + st.local.u32 [%rd14+8], %r409; + st.local.u32 [%rd14+4], %r409; + st.local.u32 [%rd14], %r409; + ld.global.u32 %r510, [root]; + // inline asm + call _rt_trace_64, (%r510, %f7, %f8, %f9, %f368, %f369, %f370, %r409, %f371, %f278, %rd80, %r301); + // inline asm + ld.local.f32 %f401, [%rd14+4]; + setp.leu.f32 %p62, %f401, 0f00000000; + @%p62 bra BB0_101; + + ld.local.f32 %f402, [%rd14]; + div.rn.f32 %f403, %f402, 0f41200000; + cvt.rzi.s32.f32 %r147, %f403; + setp.lt.s32 %p63, %r147, 0; + @%p63 bra BB0_101; + + cvt.s64.s32 %rd113, %r147; + mov.u64 %rd117, lmidLODs; + cvta.global.u64 %rd106, %rd117; + mov.u32 %r515, 1; + // inline asm + call (%rd105), _rt_buffer_get_64, (%rd106, %r515, %r294, %rd113, %rd56, %rd56, %rd56); + // inline asm + ld.local.f32 %f404, [%rd14+4]; + ld.f32 %f405, [%rd105]; + max.f32 %f406, %f405, %f404; + // inline asm + call (%rd111), _rt_buffer_get_64, (%rd106, %r515, %r294, %rd113, %rd56, %rd56, %rd56); + // inline asm + st.f32 [%rd111], %f406; + +BB0_101: + mad.lo.s32 %r517, %r75, 1664525, 1013904223; + and.b32 %r518, %r517, 16777215; + cvt.rn.f32.u32 %f407, %r518; + fma.rn.f32 %f408, %f407, 0f33800000, 0f3F800000; + mul.f32 %f409, %f408, 0f3F000000; + mad.lo.s32 %r148, %r517, 1664525, 1013904223; + and.b32 %r519, %r148, 16777215; + cvt.rn.f32.u32 %f410, %r519; + fma.rn.f32 %f411, %f410, 0f33800000, 0f00000000; + mul.f32 %f412, %f411, 0f3F000000; + sqrt.rn.f32 %f93, %f409; + mul.f32 %f628, %f412, 0f40C90FDB; + abs.f32 %f95, %f628; + setp.neu.f32 %p64, %f95, 0f7F800000; + mov.f32 %f622, %f628; + @%p64 bra BB0_103; + + mul.rn.f32 %f622, %f628, %f286; + +BB0_103: + mul.f32 %f414, %f622, 0f3F22F983; + cvt.rni.s32.f32 %r771, %f414; + cvt.rn.f32.s32 %f415, %r771; + neg.f32 %f416, %f415; + fma.rn.f32 %f418, %f416, %f229, %f622; + fma.rn.f32 %f420, %f416, %f231, %f418; + fma.rn.f32 %f623, %f416, %f233, %f420; + abs.f32 %f422, %f622; + setp.leu.f32 %p65, %f422, 0f47CE4780; + @%p65 bra BB0_114; + + mov.b32 %r150, %f622; + shr.u32 %r151, %r150, 23; + shl.b32 %r522, %r150, 8; + or.b32 %r152, %r522, -2147483648; + cvta.to.local.u64 %rd175, %rd72; + mov.u32 %r763, 0; + mov.u64 %rd174, __cudart_i2opi_f; + mov.u32 %r762, -6; + +BB0_105: + .pragma "nounroll"; + ld.const.u32 %r525, [%rd174]; + // inline asm + { + mad.lo.cc.u32 %r523, %r525, %r152, %r763; + madc.hi.u32 %r763, %r525, %r152, 0; + } + // inline asm + st.local.u32 [%rd175], %r523; + add.s64 %rd175, %rd175, 4; + add.s64 %rd174, %rd174, 4; + add.s32 %r762, %r762, 1; + setp.ne.s32 %p66, %r762, 0; + @%p66 bra BB0_105; + + and.b32 %r528, %r151, 255; + add.s32 %r529, %r528, -128; + shr.u32 %r530, %r529, 5; + and.b32 %r157, %r150, -2147483648; + cvta.to.local.u64 %rd121, %rd72; + st.local.u32 [%rd121+24], %r763; + mov.u32 %r531, 6; + sub.s32 %r532, %r531, %r530; + mul.wide.s32 %rd122, %r532, 4; + add.s64 %rd32, %rd121, %rd122; + ld.local.u32 %r764, [%rd32]; + ld.local.u32 %r765, [%rd32+-4]; + and.b32 %r160, %r151, 31; + setp.eq.s32 %p67, %r160, 0; + @%p67 bra BB0_108; + + mov.u32 %r533, 32; + sub.s32 %r534, %r533, %r160; + shr.u32 %r535, %r765, %r534; + shl.b32 %r536, %r764, %r160; + add.s32 %r764, %r535, %r536; + ld.local.u32 %r537, [%rd32+-8]; + shr.u32 %r538, %r537, %r534; + shl.b32 %r539, %r765, %r160; + add.s32 %r765, %r538, %r539; + +BB0_108: + shr.u32 %r540, %r765, 30; + shl.b32 %r541, %r764, 2; + add.s32 %r766, %r540, %r541; + shl.b32 %r166, %r765, 2; + shr.u32 %r542, %r766, 31; + shr.u32 %r543, %r764, 30; + add.s32 %r167, %r542, %r543; + setp.eq.s32 %p68, %r542, 0; + @%p68 bra BB0_109; + + not.b32 %r544, %r766; + neg.s32 %r768, %r166; + setp.eq.s32 %p69, %r166, 0; + selp.u32 %r545, 1, 0, %p69; + add.s32 %r766, %r545, %r544; + xor.b32 %r767, %r157, -2147483648; + bra.uni BB0_111; + +BB0_109: + mov.u32 %r767, %r157; + mov.u32 %r768, %r166; + +BB0_111: + clz.b32 %r770, %r766; + setp.eq.s32 %p70, %r770, 0; + shl.b32 %r546, %r766, %r770; + mov.u32 %r547, 32; + sub.s32 %r548, %r547, %r770; + shr.u32 %r549, %r768, %r548; + add.s32 %r550, %r549, %r546; + selp.b32 %r175, %r766, %r550, %p70; + mov.u32 %r551, -921707870; + mul.hi.u32 %r769, %r175, %r551; + setp.eq.s32 %p71, %r157, 0; + neg.s32 %r552, %r167; + selp.b32 %r771, %r167, %r552, %p71; + setp.lt.s32 %p72, %r769, 1; + @%p72 bra BB0_113; + + mul.lo.s32 %r553, %r175, -921707870; + shr.u32 %r554, %r553, 31; + shl.b32 %r555, %r769, 1; + add.s32 %r769, %r554, %r555; + add.s32 %r770, %r770, 1; + +BB0_113: + mov.u32 %r556, 126; + sub.s32 %r557, %r556, %r770; + shl.b32 %r558, %r557, 23; + add.s32 %r559, %r769, 1; + shr.u32 %r560, %r559, 7; + add.s32 %r561, %r560, 1; + shr.u32 %r562, %r561, 1; + add.s32 %r563, %r562, %r558; + or.b32 %r564, %r563, %r767; + mov.b32 %f623, %r564; + +BB0_114: + mul.rn.f32 %f101, %f623, %f623; + add.s32 %r183, %r771, 1; + and.b32 %r184, %r183, 1; + setp.eq.s32 %p73, %r184, 0; + @%p73 bra BB0_116; + + mov.f32 %f423, 0fBAB6061A; + mov.f32 %f424, 0f37CCF5CE; + fma.rn.f32 %f624, %f424, %f101, %f423; + bra.uni BB0_117; + +BB0_116: + mov.f32 %f425, 0f3C08839E; + mov.f32 %f426, 0fB94CA1F9; + fma.rn.f32 %f624, %f426, %f101, %f425; + +BB0_117: + @%p73 bra BB0_119; + + mov.f32 %f427, 0f3D2AAAA5; + fma.rn.f32 %f428, %f624, %f101, %f427; + mov.f32 %f429, 0fBF000000; + fma.rn.f32 %f625, %f428, %f101, %f429; + bra.uni BB0_120; + +BB0_119: + mov.f32 %f430, 0fBE2AAAA3; + fma.rn.f32 %f431, %f624, %f101, %f430; + fma.rn.f32 %f625, %f431, %f101, %f286; + +BB0_120: + fma.rn.f32 %f626, %f625, %f623, %f623; + @%p73 bra BB0_122; + + fma.rn.f32 %f626, %f625, %f101, %f281; + +BB0_122: + and.b32 %r565, %r183, 2; + setp.eq.s32 %p76, %r565, 0; + @%p76 bra BB0_124; + + mov.f32 %f435, 0fBF800000; + fma.rn.f32 %f626, %f626, %f435, %f286; + +BB0_124: + @%p64 bra BB0_126; + + mul.rn.f32 %f628, %f628, %f286; + +BB0_126: + mul.f32 %f437, %f628, 0f3F22F983; + cvt.rni.s32.f32 %r781, %f437; + cvt.rn.f32.s32 %f438, %r781; + neg.f32 %f439, %f438; + fma.rn.f32 %f441, %f439, %f229, %f628; + fma.rn.f32 %f443, %f439, %f231, %f441; + fma.rn.f32 %f629, %f439, %f233, %f443; + abs.f32 %f445, %f628; + setp.leu.f32 %p78, %f445, 0f47CE4780; + @%p78 bra BB0_137; + + mov.b32 %r186, %f628; + shr.u32 %r187, %r186, 23; + shl.b32 %r568, %r186, 8; + or.b32 %r188, %r568, -2147483648; + cvta.to.local.u64 %rd177, %rd72; + mov.u32 %r773, 0; + mov.u64 %rd176, __cudart_i2opi_f; + mov.u32 %r772, -6; + +BB0_128: + .pragma "nounroll"; + ld.const.u32 %r571, [%rd176]; + // inline asm + { + mad.lo.cc.u32 %r569, %r571, %r188, %r773; + madc.hi.u32 %r773, %r571, %r188, 0; + } + // inline asm + st.local.u32 [%rd177], %r569; + add.s64 %rd177, %rd177, 4; + add.s64 %rd176, %rd176, 4; + add.s32 %r772, %r772, 1; + setp.ne.s32 %p79, %r772, 0; + @%p79 bra BB0_128; + + and.b32 %r574, %r187, 255; + add.s32 %r575, %r574, -128; + shr.u32 %r576, %r575, 5; + and.b32 %r193, %r186, -2147483648; + cvta.to.local.u64 %rd126, %rd72; + st.local.u32 [%rd126+24], %r773; + mov.u32 %r577, 6; + sub.s32 %r578, %r577, %r576; + mul.wide.s32 %rd127, %r578, 4; + add.s64 %rd38, %rd126, %rd127; + ld.local.u32 %r774, [%rd38]; + ld.local.u32 %r775, [%rd38+-4]; + and.b32 %r196, %r187, 31; + setp.eq.s32 %p80, %r196, 0; + @%p80 bra BB0_131; + + mov.u32 %r579, 32; + sub.s32 %r580, %r579, %r196; + shr.u32 %r581, %r775, %r580; + shl.b32 %r582, %r774, %r196; + add.s32 %r774, %r581, %r582; + ld.local.u32 %r583, [%rd38+-8]; + shr.u32 %r584, %r583, %r580; + shl.b32 %r585, %r775, %r196; + add.s32 %r775, %r584, %r585; + +BB0_131: + shr.u32 %r586, %r775, 30; + shl.b32 %r587, %r774, 2; + add.s32 %r776, %r586, %r587; + shl.b32 %r202, %r775, 2; + shr.u32 %r588, %r776, 31; + shr.u32 %r589, %r774, 30; + add.s32 %r203, %r588, %r589; + setp.eq.s32 %p81, %r588, 0; + @%p81 bra BB0_132; + + not.b32 %r590, %r776; + neg.s32 %r778, %r202; + setp.eq.s32 %p82, %r202, 0; + selp.u32 %r591, 1, 0, %p82; + add.s32 %r776, %r591, %r590; + xor.b32 %r777, %r193, -2147483648; + bra.uni BB0_134; + +BB0_132: + mov.u32 %r777, %r193; + mov.u32 %r778, %r202; + +BB0_134: + clz.b32 %r780, %r776; + setp.eq.s32 %p83, %r780, 0; + shl.b32 %r592, %r776, %r780; + mov.u32 %r593, 32; + sub.s32 %r594, %r593, %r780; + shr.u32 %r595, %r778, %r594; + add.s32 %r596, %r595, %r592; + selp.b32 %r211, %r776, %r596, %p83; + mov.u32 %r597, -921707870; + mul.hi.u32 %r779, %r211, %r597; + setp.eq.s32 %p84, %r193, 0; + neg.s32 %r598, %r203; + selp.b32 %r781, %r203, %r598, %p84; + setp.lt.s32 %p85, %r779, 1; + @%p85 bra BB0_136; + + mul.lo.s32 %r599, %r211, -921707870; + shr.u32 %r600, %r599, 31; + shl.b32 %r601, %r779, 1; + add.s32 %r779, %r600, %r601; + add.s32 %r780, %r780, 1; + +BB0_136: + mov.u32 %r602, 126; + sub.s32 %r603, %r602, %r780; + shl.b32 %r604, %r603, 23; + add.s32 %r605, %r779, 1; + shr.u32 %r606, %r605, 7; + add.s32 %r607, %r606, 1; + shr.u32 %r608, %r607, 1; + add.s32 %r609, %r608, %r604; + or.b32 %r610, %r609, %r777; + mov.b32 %f629, %r610; + +BB0_137: + mul.rn.f32 %f118, %f629, %f629; + and.b32 %r219, %r781, 1; + setp.eq.s32 %p86, %r219, 0; + @%p86 bra BB0_139; + + mov.f32 %f446, 0fBAB6061A; + mov.f32 %f447, 0f37CCF5CE; + fma.rn.f32 %f630, %f447, %f118, %f446; + bra.uni BB0_140; + +BB0_139: + mov.f32 %f448, 0f3C08839E; + mov.f32 %f449, 0fB94CA1F9; + fma.rn.f32 %f630, %f449, %f118, %f448; + +BB0_140: + @%p86 bra BB0_142; + + mov.f32 %f450, 0f3D2AAAA5; + fma.rn.f32 %f451, %f630, %f118, %f450; + mov.f32 %f452, 0fBF000000; + fma.rn.f32 %f631, %f451, %f118, %f452; + bra.uni BB0_143; + +BB0_142: + mov.f32 %f453, 0fBE2AAAA3; + fma.rn.f32 %f454, %f630, %f118, %f453; + fma.rn.f32 %f631, %f454, %f118, %f286; + +BB0_143: + fma.rn.f32 %f632, %f631, %f629, %f629; + @%p86 bra BB0_145; + + fma.rn.f32 %f632, %f631, %f118, %f281; + +BB0_145: + and.b32 %r611, %r781, 2; + setp.eq.s32 %p89, %r611, 0; + @%p89 bra BB0_147; + + mov.f32 %f458, 0fBF800000; + fma.rn.f32 %f632, %f632, %f458, %f286; + +BB0_147: + mul.f32 %f467, %f93, %f626; + mul.f32 %f468, %f467, %f467; + sub.f32 %f470, %f281, %f468; + mul.f32 %f471, %f93, %f632; + mul.f32 %f472, %f471, %f471; + sub.f32 %f473, %f470, %f472; + max.f32 %f475, %f286, %f473; + sqrt.rn.f32 %f476, %f475; + mul.f32 %f477, %f10, %f471; + mul.f32 %f478, %f11, %f471; + mul.f32 %f479, %f12, %f471; + fma.rn.f32 %f480, %f13, %f467, %f477; + fma.rn.f32 %f481, %f14, %f467, %f478; + fma.rn.f32 %f482, %f15, %f467, %f479; + fma.rn.f32 %f462, %f595, %f476, %f480; + fma.rn.f32 %f463, %f596, %f476, %f481; + fma.rn.f32 %f464, %f597, %f476, %f482; + abs.f32 %f483, %f462; + abs.f32 %f484, %f463; + abs.f32 %f485, %f464; + div.rn.f32 %f486, %f16, %f483; + div.rn.f32 %f487, %f17, %f484; + div.rn.f32 %f488, %f18, %f485; + abs.f32 %f489, %f486; + abs.f32 %f490, %f487; + abs.f32 %f491, %f488; + max.f32 %f492, %f489, %f490; + max.f32 %f493, %f492, %f491; + max.f32 %f465, %f493, %f199; + st.local.u32 [%rd14+8], %r409; + st.local.u32 [%rd14+4], %r409; + st.local.u32 [%rd14], %r409; + ld.global.u32 %r612, [root]; + // inline asm + call _rt_trace_64, (%r612, %f7, %f8, %f9, %f462, %f463, %f464, %r409, %f465, %f278, %rd80, %r301); + // inline asm + ld.local.f32 %f495, [%rd14+4]; + setp.leu.f32 %p90, %f495, 0f00000000; + @%p90 bra BB0_150; + + ld.local.f32 %f496, [%rd14]; + div.rn.f32 %f497, %f496, 0f41200000; + cvt.rzi.s32.f32 %r220, %f497; + setp.lt.s32 %p91, %r220, 0; + @%p91 bra BB0_150; + + cvt.s64.s32 %rd137, %r220; + mov.u64 %rd141, lmidLODs; + cvta.global.u64 %rd130, %rd141; + mov.u32 %r617, 1; + // inline asm + call (%rd129), _rt_buffer_get_64, (%rd130, %r617, %r294, %rd137, %rd56, %rd56, %rd56); + // inline asm + ld.local.f32 %f498, [%rd14+4]; + ld.f32 %f499, [%rd129]; + max.f32 %f500, %f499, %f498; + // inline asm + call (%rd135), _rt_buffer_get_64, (%rd130, %r617, %r294, %rd137, %rd56, %rd56, %rd56); + // inline asm + st.f32 [%rd135], %f500; + +BB0_150: + mad.lo.s32 %r619, %r148, 1664525, 1013904223; + and.b32 %r620, %r619, 16777215; + cvt.rn.f32.u32 %f501, %r620; + fma.rn.f32 %f502, %f501, 0f33800000, 0f3F800000; + mul.f32 %f503, %f502, 0f3F000000; + mad.lo.s32 %r621, %r619, 1664525, 7271263; + and.b32 %r622, %r621, 16777215; + cvt.rn.f32.u32 %f504, %r622; + fma.rn.f32 %f505, %f504, 0f33800000, 0f3F800000; + mul.f32 %f506, %f505, 0f3F000000; + sqrt.rn.f32 %f130, %f503; + mul.f32 %f640, %f506, 0f40C90FDB; + abs.f32 %f132, %f640; + setp.neu.f32 %p92, %f132, 0f7F800000; + mov.f32 %f634, %f640; + @%p92 bra BB0_152; + + mul.rn.f32 %f634, %f640, %f286; + +BB0_152: + mul.f32 %f508, %f634, 0f3F22F983; + cvt.rni.s32.f32 %r791, %f508; + cvt.rn.f32.s32 %f509, %r791; + neg.f32 %f510, %f509; + fma.rn.f32 %f512, %f510, %f229, %f634; + fma.rn.f32 %f514, %f510, %f231, %f512; + fma.rn.f32 %f635, %f510, %f233, %f514; + abs.f32 %f516, %f634; + setp.leu.f32 %p93, %f516, 0f47CE4780; + @%p93 bra BB0_163; + + mov.b32 %r222, %f634; + shr.u32 %r223, %r222, 23; + shl.b32 %r625, %r222, 8; + or.b32 %r224, %r625, -2147483648; + cvta.to.local.u64 %rd179, %rd72; + mov.u32 %r783, 0; + mov.u64 %rd178, __cudart_i2opi_f; + mov.u32 %r782, -6; + +BB0_154: + .pragma "nounroll"; + ld.const.u32 %r628, [%rd178]; + // inline asm + { + mad.lo.cc.u32 %r626, %r628, %r224, %r783; + madc.hi.u32 %r783, %r628, %r224, 0; + } + // inline asm + st.local.u32 [%rd179], %r626; + add.s64 %rd179, %rd179, 4; + add.s64 %rd178, %rd178, 4; + add.s32 %r782, %r782, 1; + setp.ne.s32 %p94, %r782, 0; + @%p94 bra BB0_154; + + and.b32 %r631, %r223, 255; + add.s32 %r632, %r631, -128; + shr.u32 %r633, %r632, 5; + and.b32 %r229, %r222, -2147483648; + cvta.to.local.u64 %rd145, %rd72; + st.local.u32 [%rd145+24], %r783; + mov.u32 %r634, 6; + sub.s32 %r635, %r634, %r633; + mul.wide.s32 %rd146, %r635, 4; + add.s64 %rd44, %rd145, %rd146; + ld.local.u32 %r784, [%rd44]; + ld.local.u32 %r785, [%rd44+-4]; + and.b32 %r232, %r223, 31; + setp.eq.s32 %p95, %r232, 0; + @%p95 bra BB0_157; + + mov.u32 %r636, 32; + sub.s32 %r637, %r636, %r232; + shr.u32 %r638, %r785, %r637; + shl.b32 %r639, %r784, %r232; + add.s32 %r784, %r638, %r639; + ld.local.u32 %r640, [%rd44+-8]; + shr.u32 %r641, %r640, %r637; + shl.b32 %r642, %r785, %r232; + add.s32 %r785, %r641, %r642; + +BB0_157: + shr.u32 %r643, %r785, 30; + shl.b32 %r644, %r784, 2; + add.s32 %r786, %r643, %r644; + shl.b32 %r238, %r785, 2; + shr.u32 %r645, %r786, 31; + shr.u32 %r646, %r784, 30; + add.s32 %r239, %r645, %r646; + setp.eq.s32 %p96, %r645, 0; + @%p96 bra BB0_158; + + not.b32 %r647, %r786; + neg.s32 %r788, %r238; + setp.eq.s32 %p97, %r238, 0; + selp.u32 %r648, 1, 0, %p97; + add.s32 %r786, %r648, %r647; + xor.b32 %r787, %r229, -2147483648; + bra.uni BB0_160; + +BB0_158: + mov.u32 %r787, %r229; + mov.u32 %r788, %r238; + +BB0_160: + clz.b32 %r790, %r786; + setp.eq.s32 %p98, %r790, 0; + shl.b32 %r649, %r786, %r790; + mov.u32 %r650, 32; + sub.s32 %r651, %r650, %r790; + shr.u32 %r652, %r788, %r651; + add.s32 %r653, %r652, %r649; + selp.b32 %r247, %r786, %r653, %p98; + mov.u32 %r654, -921707870; + mul.hi.u32 %r789, %r247, %r654; + setp.eq.s32 %p99, %r229, 0; + neg.s32 %r655, %r239; + selp.b32 %r791, %r239, %r655, %p99; + setp.lt.s32 %p100, %r789, 1; + @%p100 bra BB0_162; + + mul.lo.s32 %r656, %r247, -921707870; + shr.u32 %r657, %r656, 31; + shl.b32 %r658, %r789, 1; + add.s32 %r789, %r657, %r658; + add.s32 %r790, %r790, 1; + +BB0_162: + mov.u32 %r659, 126; + sub.s32 %r660, %r659, %r790; + shl.b32 %r661, %r660, 23; + add.s32 %r662, %r789, 1; + shr.u32 %r663, %r662, 7; + add.s32 %r664, %r663, 1; + shr.u32 %r665, %r664, 1; + add.s32 %r666, %r665, %r661; + or.b32 %r667, %r666, %r787; + mov.b32 %f635, %r667; + +BB0_163: + mul.rn.f32 %f138, %f635, %f635; + add.s32 %r255, %r791, 1; + and.b32 %r256, %r255, 1; + setp.eq.s32 %p101, %r256, 0; + @%p101 bra BB0_165; + + mov.f32 %f517, 0fBAB6061A; + mov.f32 %f518, 0f37CCF5CE; + fma.rn.f32 %f636, %f518, %f138, %f517; + bra.uni BB0_166; + +BB0_165: + mov.f32 %f519, 0f3C08839E; + mov.f32 %f520, 0fB94CA1F9; + fma.rn.f32 %f636, %f520, %f138, %f519; + +BB0_166: + @%p101 bra BB0_168; + + mov.f32 %f521, 0f3D2AAAA5; + fma.rn.f32 %f522, %f636, %f138, %f521; + mov.f32 %f523, 0fBF000000; + fma.rn.f32 %f637, %f522, %f138, %f523; + bra.uni BB0_169; + +BB0_168: + mov.f32 %f524, 0fBE2AAAA3; + fma.rn.f32 %f525, %f636, %f138, %f524; + fma.rn.f32 %f637, %f525, %f138, %f286; + +BB0_169: + fma.rn.f32 %f638, %f637, %f635, %f635; + @%p101 bra BB0_171; + + fma.rn.f32 %f638, %f637, %f138, %f281; + +BB0_171: + and.b32 %r668, %r255, 2; + setp.eq.s32 %p104, %r668, 0; + @%p104 bra BB0_173; + + mov.f32 %f529, 0fBF800000; + fma.rn.f32 %f638, %f638, %f529, %f286; + +BB0_173: + @%p92 bra BB0_175; + + mul.rn.f32 %f640, %f640, %f286; + +BB0_175: + mul.f32 %f531, %f640, 0f3F22F983; + cvt.rni.s32.f32 %r801, %f531; + cvt.rn.f32.s32 %f532, %r801; + neg.f32 %f533, %f532; + fma.rn.f32 %f535, %f533, %f229, %f640; + fma.rn.f32 %f537, %f533, %f231, %f535; + fma.rn.f32 %f641, %f533, %f233, %f537; + abs.f32 %f539, %f640; + setp.leu.f32 %p106, %f539, 0f47CE4780; + @%p106 bra BB0_186; + + mov.b32 %r258, %f640; + shr.u32 %r259, %r258, 23; + shl.b32 %r671, %r258, 8; + or.b32 %r260, %r671, -2147483648; + cvta.to.local.u64 %rd181, %rd72; + mov.u64 %rd180, __cudart_i2opi_f; + mov.u32 %r792, -6; + mov.u32 %r793, %r409; + +BB0_177: + .pragma "nounroll"; + ld.const.u32 %r674, [%rd180]; + // inline asm + { + mad.lo.cc.u32 %r672, %r674, %r260, %r793; + madc.hi.u32 %r793, %r674, %r260, 0; + } + // inline asm + st.local.u32 [%rd181], %r672; + add.s64 %rd181, %rd181, 4; + add.s64 %rd180, %rd180, 4; + add.s32 %r792, %r792, 1; + setp.ne.s32 %p107, %r792, 0; + @%p107 bra BB0_177; + + and.b32 %r677, %r259, 255; + add.s32 %r678, %r677, -128; + shr.u32 %r679, %r678, 5; + and.b32 %r265, %r258, -2147483648; + cvta.to.local.u64 %rd150, %rd72; + st.local.u32 [%rd150+24], %r793; + mov.u32 %r680, 6; + sub.s32 %r681, %r680, %r679; + mul.wide.s32 %rd151, %r681, 4; + add.s64 %rd50, %rd150, %rd151; + ld.local.u32 %r794, [%rd50]; + ld.local.u32 %r795, [%rd50+-4]; + and.b32 %r268, %r259, 31; + setp.eq.s32 %p108, %r268, 0; + @%p108 bra BB0_180; + + mov.u32 %r682, 32; + sub.s32 %r683, %r682, %r268; + shr.u32 %r684, %r795, %r683; + shl.b32 %r685, %r794, %r268; + add.s32 %r794, %r684, %r685; + ld.local.u32 %r686, [%rd50+-8]; + shr.u32 %r687, %r686, %r683; + shl.b32 %r688, %r795, %r268; + add.s32 %r795, %r687, %r688; + +BB0_180: + shr.u32 %r689, %r795, 30; + shl.b32 %r690, %r794, 2; + add.s32 %r796, %r689, %r690; + shl.b32 %r274, %r795, 2; + shr.u32 %r691, %r796, 31; + shr.u32 %r692, %r794, 30; + add.s32 %r275, %r691, %r692; + setp.eq.s32 %p109, %r691, 0; + @%p109 bra BB0_181; + + not.b32 %r693, %r796; + neg.s32 %r798, %r274; + setp.eq.s32 %p110, %r274, 0; + selp.u32 %r694, 1, 0, %p110; + add.s32 %r796, %r694, %r693; + xor.b32 %r797, %r265, -2147483648; + bra.uni BB0_183; + +BB0_181: + mov.u32 %r797, %r265; + mov.u32 %r798, %r274; + +BB0_183: + clz.b32 %r800, %r796; + setp.eq.s32 %p111, %r800, 0; + shl.b32 %r695, %r796, %r800; + mov.u32 %r696, 32; + sub.s32 %r697, %r696, %r800; + shr.u32 %r698, %r798, %r697; + add.s32 %r699, %r698, %r695; + selp.b32 %r283, %r796, %r699, %p111; + mov.u32 %r700, -921707870; + mul.hi.u32 %r799, %r283, %r700; + setp.eq.s32 %p112, %r265, 0; + neg.s32 %r701, %r275; + selp.b32 %r801, %r275, %r701, %p112; + setp.lt.s32 %p113, %r799, 1; + @%p113 bra BB0_185; + + mul.lo.s32 %r702, %r283, -921707870; + shr.u32 %r703, %r702, 31; + shl.b32 %r704, %r799, 1; + add.s32 %r799, %r703, %r704; + add.s32 %r800, %r800, 1; + +BB0_185: + mov.u32 %r705, 126; + sub.s32 %r706, %r705, %r800; + shl.b32 %r707, %r706, 23; + add.s32 %r708, %r799, 1; + shr.u32 %r709, %r708, 7; + add.s32 %r710, %r709, 1; + shr.u32 %r711, %r710, 1; + add.s32 %r712, %r711, %r707; + or.b32 %r713, %r712, %r797; + mov.b32 %f641, %r713; + +BB0_186: + mul.rn.f32 %f155, %f641, %f641; + and.b32 %r291, %r801, 1; + setp.eq.s32 %p114, %r291, 0; + @%p114 bra BB0_188; + + mov.f32 %f540, 0fBAB6061A; + mov.f32 %f541, 0f37CCF5CE; + fma.rn.f32 %f642, %f541, %f155, %f540; + bra.uni BB0_189; + +BB0_188: + mov.f32 %f542, 0f3C08839E; + mov.f32 %f543, 0fB94CA1F9; + fma.rn.f32 %f642, %f543, %f155, %f542; + +BB0_189: + @%p114 bra BB0_191; + + mov.f32 %f544, 0f3D2AAAA5; + fma.rn.f32 %f545, %f642, %f155, %f544; + mov.f32 %f546, 0fBF000000; + fma.rn.f32 %f643, %f545, %f155, %f546; + bra.uni BB0_192; + +BB0_191: + mov.f32 %f547, 0fBE2AAAA3; + fma.rn.f32 %f548, %f642, %f155, %f547; + fma.rn.f32 %f643, %f548, %f155, %f286; + +BB0_192: + fma.rn.f32 %f644, %f643, %f641, %f641; + @%p114 bra BB0_194; + + fma.rn.f32 %f644, %f643, %f155, %f281; + +BB0_194: + and.b32 %r714, %r801, 2; + setp.eq.s32 %p117, %r714, 0; + @%p117 bra BB0_196; + + mov.f32 %f552, 0fBF800000; + fma.rn.f32 %f644, %f644, %f552, %f286; + +BB0_196: + mul.f32 %f561, %f130, %f638; + mul.f32 %f562, %f561, %f561; + sub.f32 %f564, %f281, %f562; + mul.f32 %f565, %f130, %f644; + mul.f32 %f566, %f565, %f565; + sub.f32 %f567, %f564, %f566; + max.f32 %f569, %f286, %f567; + sqrt.rn.f32 %f570, %f569; + mul.f32 %f571, %f10, %f565; + mul.f32 %f572, %f11, %f565; + mul.f32 %f573, %f12, %f565; + fma.rn.f32 %f574, %f13, %f561, %f571; + fma.rn.f32 %f575, %f14, %f561, %f572; + fma.rn.f32 %f576, %f15, %f561, %f573; + fma.rn.f32 %f556, %f595, %f570, %f574; + fma.rn.f32 %f557, %f596, %f570, %f575; + fma.rn.f32 %f558, %f597, %f570, %f576; + abs.f32 %f577, %f556; + abs.f32 %f578, %f557; + abs.f32 %f579, %f558; + div.rn.f32 %f580, %f16, %f577; + div.rn.f32 %f581, %f17, %f578; + div.rn.f32 %f582, %f18, %f579; + abs.f32 %f583, %f580; + abs.f32 %f584, %f581; + abs.f32 %f585, %f582; + max.f32 %f586, %f583, %f584; + max.f32 %f587, %f586, %f585; + max.f32 %f559, %f587, %f199; + st.local.u32 [%rd14+8], %r409; + st.local.u32 [%rd14+4], %r409; + st.local.u32 [%rd14], %r409; + ld.global.u32 %r715, [root]; + // inline asm + call _rt_trace_64, (%r715, %f7, %f8, %f9, %f556, %f557, %f558, %r409, %f559, %f278, %rd80, %r301); + // inline asm + ld.local.f32 %f589, [%rd14+4]; + setp.leu.f32 %p118, %f589, 0f00000000; + @%p118 bra BB0_199; + + ld.local.f32 %f590, [%rd14]; + div.rn.f32 %f591, %f590, 0f41200000; + cvt.rzi.s32.f32 %r292, %f591; + setp.lt.s32 %p119, %r292, 0; + @%p119 bra BB0_199; + + cvt.s64.s32 %rd161, %r292; + mov.u64 %rd165, lmidLODs; + cvta.global.u64 %rd154, %rd165; + mov.u32 %r720, 1; + // inline asm + call (%rd153), _rt_buffer_get_64, (%rd154, %r720, %r294, %rd161, %rd56, %rd56, %rd56); + // inline asm + ld.local.f32 %f592, [%rd14+4]; + ld.f32 %f593, [%rd153]; + max.f32 %f594, %f593, %f592; + // inline asm + call (%rd159), _rt_buffer_get_64, (%rd154, %r720, %r294, %rd161, %rd56, %rd56, %rd56); + // inline asm + st.f32 [%rd159], %f594; + +BB0_199: + ret; +} + + |