diff options
author | tylermurphy534 <tylermurphy534@gmail.com> | 2022-11-06 15:12:42 -0500 |
---|---|---|
committer | tylermurphy534 <tylermurphy534@gmail.com> | 2022-11-06 15:12:42 -0500 |
commit | eb84bb298d2b95aec7b2ae12cbf25ac64f25379a (patch) | |
tree | efd616a157df06ab661c6d56651853431ac6b08b /VRCSDK3Worlds/Assets/Editor/x64/Bakery/addSH.ptx | |
download | unityprojects-eb84bb298d2b95aec7b2ae12cbf25ac64f25379a.tar.gz unityprojects-eb84bb298d2b95aec7b2ae12cbf25ac64f25379a.tar.bz2 unityprojects-eb84bb298d2b95aec7b2ae12cbf25ac64f25379a.zip |
move to self host
Diffstat (limited to 'VRCSDK3Worlds/Assets/Editor/x64/Bakery/addSH.ptx')
-rw-r--r-- | VRCSDK3Worlds/Assets/Editor/x64/Bakery/addSH.ptx | 666 |
1 files changed, 666 insertions, 0 deletions
diff --git a/VRCSDK3Worlds/Assets/Editor/x64/Bakery/addSH.ptx b/VRCSDK3Worlds/Assets/Editor/x64/Bakery/addSH.ptx new file mode 100644 index 00000000..8dcce319 --- /dev/null +++ b/VRCSDK3Worlds/Assets/Editor/x64/Bakery/addSH.ptx @@ -0,0 +1,666 @@ +// +// Generated by NVIDIA NVVM Compiler +// +// Compiler Build ID: CL-23083092 +// Cuda compilation tools, release 9.1, V9.1.85 +// Based on LLVM 3.4svn +// + +.version 6.1 +.target sm_30 +.address_size 64 + + // .globl _Z6oxMainv +.global .align 8 .b8 pixelID[8]; +.global .align 8 .b8 resolution[8]; +.global .align 4 .b8 normal[12]; +.global .align 4 .b8 camPos[12]; +.global .align 4 .b8 root[4]; +.global .align 4 .u32 imageEnabled; +.global .texref lightmap; +.global .align 16 .b8 tileInfo[16]; +.global .align 4 .u32 additive; +.global .align 1 .b8 inputImageL0[1]; +.global .align 1 .b8 inputImageL1x[1]; +.global .align 1 .b8 inputImageL1y[1]; +.global .align 1 .b8 inputImageL1z[1]; +.global .align 1 .b8 outputImageL0[1]; +.global .align 1 .b8 outputImageL1x[1]; +.global .align 1 .b8 outputImageL1y[1]; +.global .align 1 .b8 outputImageL1z[1]; +.global .align 1 .b8 packedImageL1x[1]; +.global .align 1 .b8 packedImageL1y[1]; +.global .align 1 .b8 packedImageL1z[1]; +.global .align 4 .f32 DoPack; +.global .align 4 .b8 _ZN21rti_internal_typeinfo7pixelIDE[8] = {82, 97, 121, 0, 8, 0, 0, 0}; +.global .align 4 .b8 _ZN21rti_internal_typeinfo10resolutionE[8] = {82, 97, 121, 0, 8, 0, 0, 0}; +.global .align 4 .b8 _ZN21rti_internal_typeinfo6normalE[8] = {82, 97, 121, 0, 12, 0, 0, 0}; +.global .align 4 .b8 _ZN21rti_internal_typeinfo6camPosE[8] = {82, 97, 121, 0, 12, 0, 0, 0}; +.global .align 4 .b8 _ZN21rti_internal_typeinfo4rootE[8] = {82, 97, 121, 0, 4, 0, 0, 0}; +.global .align 4 .b8 _ZN21rti_internal_typeinfo12imageEnabledE[8] = {82, 97, 121, 0, 4, 0, 0, 0}; +.global .align 4 .b8 _ZN21rti_internal_typeinfo8tileInfoE[8] = {82, 97, 121, 0, 16, 0, 0, 0}; +.global .align 4 .b8 _ZN21rti_internal_typeinfo8additiveE[8] = {82, 97, 121, 0, 4, 0, 0, 0}; +.global .align 4 .b8 _ZN21rti_internal_typeinfo6DoPackE[8] = {82, 97, 121, 0, 4, 0, 0, 0}; +.global .align 8 .u64 _ZN21rti_internal_register20reg_bitness_detectorE; +.global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail0E; +.global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail1E; +.global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail2E; +.global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail3E; +.global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail4E; +.global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail5E; +.global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail6E; +.global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail7E; +.global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail8E; +.global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail9E; +.global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail0E; +.global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail1E; +.global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail2E; +.global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail3E; +.global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail4E; +.global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail5E; +.global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail6E; +.global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail7E; +.global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail8E; +.global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail9E; +.global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_xE; +.global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_yE; +.global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_zE; +.global .align 8 .b8 _ZN21rti_internal_typename7pixelIDE[6] = {117, 105, 110, 116, 50, 0}; +.global .align 8 .b8 _ZN21rti_internal_typename10resolutionE[6] = {117, 105, 110, 116, 50, 0}; +.global .align 8 .b8 _ZN21rti_internal_typename6normalE[7] = {102, 108, 111, 97, 116, 51, 0}; +.global .align 8 .b8 _ZN21rti_internal_typename6camPosE[7] = {102, 108, 111, 97, 116, 51, 0}; +.global .align 16 .b8 _ZN21rti_internal_typename4rootE[9] = {114, 116, 79, 98, 106, 101, 99, 116, 0}; +.global .align 4 .b8 _ZN21rti_internal_typename12imageEnabledE[4] = {105, 110, 116, 0}; +.global .align 8 .b8 _ZN21rti_internal_typename8tileInfoE[6] = {117, 105, 110, 116, 52, 0}; +.global .align 4 .b8 _ZN21rti_internal_typename8additiveE[4] = {105, 110, 116, 0}; +.global .align 8 .b8 _ZN21rti_internal_typename6DoPackE[6] = {102, 108, 111, 97, 116, 0}; +.global .align 4 .u32 _ZN21rti_internal_typeenum7pixelIDE = 4919; +.global .align 4 .u32 _ZN21rti_internal_typeenum10resolutionE = 4919; +.global .align 4 .u32 _ZN21rti_internal_typeenum6normalE = 4919; +.global .align 4 .u32 _ZN21rti_internal_typeenum6camPosE = 4919; +.global .align 4 .u32 _ZN21rti_internal_typeenum4rootE = 4919; +.global .align 4 .u32 _ZN21rti_internal_typeenum12imageEnabledE = 4919; +.global .align 4 .u32 _ZN21rti_internal_typeenum8tileInfoE = 4919; +.global .align 4 .u32 _ZN21rti_internal_typeenum8additiveE = 4919; +.global .align 4 .u32 _ZN21rti_internal_typeenum6DoPackE = 4919; +.global .align 16 .b8 _ZN21rti_internal_semantic7pixelIDE[14] = {114, 116, 76, 97, 117, 110, 99, 104, 73, 110, 100, 101, 120, 0}; +.global .align 16 .b8 _ZN21rti_internal_semantic10resolutionE[12] = {114, 116, 76, 97, 117, 110, 99, 104, 68, 105, 109, 0}; +.global .align 16 .b8 _ZN21rti_internal_semantic6normalE[17] = {97, 116, 116, 114, 105, 98, 117, 116, 101, 32, 110, 111, 114, 109, 97, 108, 0}; +.global .align 1 .b8 _ZN21rti_internal_semantic6camPosE[1]; +.global .align 1 .b8 _ZN21rti_internal_semantic4rootE[1]; +.global .align 1 .b8 _ZN21rti_internal_semantic12imageEnabledE[1]; +.global .align 1 .b8 _ZN21rti_internal_semantic8tileInfoE[1]; +.global .align 1 .b8 _ZN21rti_internal_semantic8additiveE[1]; +.global .align 1 .b8 _ZN21rti_internal_semantic6DoPackE[1]; +.global .align 1 .b8 _ZN23rti_internal_annotation7pixelIDE[1]; +.global .align 1 .b8 _ZN23rti_internal_annotation10resolutionE[1]; +.global .align 1 .b8 _ZN23rti_internal_annotation6normalE[1]; +.global .align 1 .b8 _ZN23rti_internal_annotation6camPosE[1]; +.global .align 1 .b8 _ZN23rti_internal_annotation4rootE[1]; +.global .align 1 .b8 _ZN23rti_internal_annotation12imageEnabledE[1]; +.global .align 1 .b8 _ZN23rti_internal_annotation8tileInfoE[1]; +.global .align 1 .b8 _ZN23rti_internal_annotation8additiveE[1]; +.global .align 1 .b8 _ZN23rti_internal_annotation6DoPackE[1]; + +.visible .entry _Z6oxMainv( + +) +{ + .reg .pred %p<2>; + .reg .b16 %rs<51>; + .reg .f32 %f<132>; + .reg .b32 %r<209>; + .reg .b64 %rd<213>; + + + ld.global.v2.u32 {%r55, %r56}, [pixelID]; + cvt.u64.u32 %rd3, %r55; + cvt.u64.u32 %rd4, %r56; + mov.u64 %rd163, inputImageL0; + cvta.global.u64 %rd2, %rd163; + mov.u32 %r53, 2; + mov.u32 %r54, 8; + mov.u64 %rd162, 0; + // inline asm + call (%rd1), _rt_buffer_get_64, (%rd2, %r53, %r54, %rd3, %rd4, %rd162, %rd162); + // inline asm + ld.u16 %rs2, [%rd1]; + // inline asm + { cvt.f32.f16 %f11, %rs2;} + + // inline asm + ld.global.v2.u32 {%r59, %r60}, [pixelID]; + cvt.u64.u32 %rd9, %r59; + cvt.u64.u32 %rd10, %r60; + // inline asm + call (%rd7), _rt_buffer_get_64, (%rd2, %r53, %r54, %rd9, %rd10, %rd162, %rd162); + // inline asm + ld.u16 %rs3, [%rd7+2]; + // inline asm + { cvt.f32.f16 %f12, %rs3;} + + // inline asm + ld.global.v2.u32 {%r63, %r64}, [pixelID]; + cvt.u64.u32 %rd15, %r63; + cvt.u64.u32 %rd16, %r64; + // inline asm + call (%rd13), _rt_buffer_get_64, (%rd2, %r53, %r54, %rd15, %rd16, %rd162, %rd162); + // inline asm + ld.u16 %rs4, [%rd13+4]; + // inline asm + { cvt.f32.f16 %f13, %rs4;} + + // inline asm + ld.global.v2.u32 {%r67, %r68}, [pixelID]; + cvt.u64.u32 %rd21, %r67; + cvt.u64.u32 %rd22, %r68; + mov.u64 %rd164, inputImageL1x; + cvta.global.u64 %rd20, %rd164; + // inline asm + call (%rd19), _rt_buffer_get_64, (%rd20, %r53, %r54, %rd21, %rd22, %rd162, %rd162); + // inline asm + ld.u16 %rs5, [%rd19]; + // inline asm + { cvt.f32.f16 %f14, %rs5;} + + // inline asm + ld.global.v2.u32 {%r71, %r72}, [pixelID]; + cvt.u64.u32 %rd27, %r71; + cvt.u64.u32 %rd28, %r72; + // inline asm + call (%rd25), _rt_buffer_get_64, (%rd20, %r53, %r54, %rd27, %rd28, %rd162, %rd162); + // inline asm + ld.u16 %rs6, [%rd25+2]; + // inline asm + { cvt.f32.f16 %f15, %rs6;} + + // inline asm + ld.global.v2.u32 {%r75, %r76}, [pixelID]; + cvt.u64.u32 %rd33, %r75; + cvt.u64.u32 %rd34, %r76; + // inline asm + call (%rd31), _rt_buffer_get_64, (%rd20, %r53, %r54, %rd33, %rd34, %rd162, %rd162); + // inline asm + ld.u16 %rs7, [%rd31+4]; + // inline asm + { cvt.f32.f16 %f16, %rs7;} + + // inline asm + fma.rn.f32 %f41, %f14, 0f40000000, 0fBF800000; + fma.rn.f32 %f42, %f15, 0f40000000, 0fBF800000; + fma.rn.f32 %f43, %f16, 0f40000000, 0fBF800000; + ld.global.v2.u32 {%r79, %r80}, [pixelID]; + cvt.u64.u32 %rd39, %r79; + cvt.u64.u32 %rd40, %r80; + mov.u64 %rd165, inputImageL1y; + cvta.global.u64 %rd38, %rd165; + // inline asm + call (%rd37), _rt_buffer_get_64, (%rd38, %r53, %r54, %rd39, %rd40, %rd162, %rd162); + // inline asm + ld.u16 %rs8, [%rd37]; + // inline asm + { cvt.f32.f16 %f17, %rs8;} + + // inline asm + ld.global.v2.u32 {%r83, %r84}, [pixelID]; + cvt.u64.u32 %rd45, %r83; + cvt.u64.u32 %rd46, %r84; + // inline asm + call (%rd43), _rt_buffer_get_64, (%rd38, %r53, %r54, %rd45, %rd46, %rd162, %rd162); + // inline asm + ld.u16 %rs9, [%rd43+2]; + // inline asm + { cvt.f32.f16 %f18, %rs9;} + + // inline asm + ld.global.v2.u32 {%r87, %r88}, [pixelID]; + cvt.u64.u32 %rd51, %r87; + cvt.u64.u32 %rd52, %r88; + // inline asm + call (%rd49), _rt_buffer_get_64, (%rd38, %r53, %r54, %rd51, %rd52, %rd162, %rd162); + // inline asm + ld.u16 %rs10, [%rd49+4]; + // inline asm + { cvt.f32.f16 %f19, %rs10;} + + // inline asm + fma.rn.f32 %f44, %f17, 0f40000000, 0fBF800000; + fma.rn.f32 %f45, %f18, 0f40000000, 0fBF800000; + fma.rn.f32 %f46, %f19, 0f40000000, 0fBF800000; + ld.global.v2.u32 {%r91, %r92}, [pixelID]; + cvt.u64.u32 %rd57, %r91; + cvt.u64.u32 %rd58, %r92; + mov.u64 %rd166, inputImageL1z; + cvta.global.u64 %rd56, %rd166; + // inline asm + call (%rd55), _rt_buffer_get_64, (%rd56, %r53, %r54, %rd57, %rd58, %rd162, %rd162); + // inline asm + ld.u16 %rs11, [%rd55]; + // inline asm + { cvt.f32.f16 %f20, %rs11;} + + // inline asm + ld.global.v2.u32 {%r95, %r96}, [pixelID]; + cvt.u64.u32 %rd63, %r95; + cvt.u64.u32 %rd64, %r96; + // inline asm + call (%rd61), _rt_buffer_get_64, (%rd56, %r53, %r54, %rd63, %rd64, %rd162, %rd162); + // inline asm + ld.u16 %rs12, [%rd61+2]; + // inline asm + { cvt.f32.f16 %f21, %rs12;} + + // inline asm + ld.global.v2.u32 {%r99, %r100}, [pixelID]; + cvt.u64.u32 %rd69, %r99; + cvt.u64.u32 %rd70, %r100; + // inline asm + call (%rd67), _rt_buffer_get_64, (%rd56, %r53, %r54, %rd69, %rd70, %rd162, %rd162); + // inline asm + ld.u16 %rs13, [%rd67+4]; + // inline asm + { cvt.f32.f16 %f22, %rs13;} + + // inline asm + fma.rn.f32 %f47, %f20, 0f40000000, 0fBF800000; + fma.rn.f32 %f48, %f21, 0f40000000, 0fBF800000; + fma.rn.f32 %f49, %f22, 0f40000000, 0fBF800000; + ld.global.v2.u32 {%r103, %r104}, [pixelID]; + cvt.u64.u32 %rd75, %r103; + cvt.u64.u32 %rd76, %r104; + mov.u64 %rd167, outputImageL0; + cvta.global.u64 %rd74, %rd167; + // inline asm + call (%rd73), _rt_buffer_get_64, (%rd74, %r53, %r54, %rd75, %rd76, %rd162, %rd162); + // inline asm + ld.u16 %rs14, [%rd73]; + // inline asm + { cvt.f32.f16 %f23, %rs14;} + + // inline asm + ld.global.v2.u32 {%r107, %r108}, [pixelID]; + cvt.u64.u32 %rd81, %r107; + cvt.u64.u32 %rd82, %r108; + // inline asm + call (%rd79), _rt_buffer_get_64, (%rd74, %r53, %r54, %rd81, %rd82, %rd162, %rd162); + // inline asm + ld.u16 %rs15, [%rd79+2]; + // inline asm + { cvt.f32.f16 %f24, %rs15;} + + // inline asm + ld.global.v2.u32 {%r111, %r112}, [pixelID]; + cvt.u64.u32 %rd87, %r111; + cvt.u64.u32 %rd88, %r112; + // inline asm + call (%rd85), _rt_buffer_get_64, (%rd74, %r53, %r54, %rd87, %rd88, %rd162, %rd162); + // inline asm + ld.u16 %rs16, [%rd85+4]; + // inline asm + { cvt.f32.f16 %f25, %rs16;} + + // inline asm + ld.global.v2.u32 {%r115, %r116}, [pixelID]; + cvt.u64.u32 %rd93, %r115; + cvt.u64.u32 %rd94, %r116; + mov.u64 %rd168, outputImageL1x; + cvta.global.u64 %rd92, %rd168; + // inline asm + call (%rd91), _rt_buffer_get_64, (%rd92, %r53, %r54, %rd93, %rd94, %rd162, %rd162); + // inline asm + ld.u16 %rs17, [%rd91]; + // inline asm + { cvt.f32.f16 %f26, %rs17;} + + // inline asm + ld.global.v2.u32 {%r119, %r120}, [pixelID]; + cvt.u64.u32 %rd99, %r119; + cvt.u64.u32 %rd100, %r120; + // inline asm + call (%rd97), _rt_buffer_get_64, (%rd92, %r53, %r54, %rd99, %rd100, %rd162, %rd162); + // inline asm + ld.u16 %rs18, [%rd97+2]; + // inline asm + { cvt.f32.f16 %f27, %rs18;} + + // inline asm + ld.global.v2.u32 {%r123, %r124}, [pixelID]; + cvt.u64.u32 %rd105, %r123; + cvt.u64.u32 %rd106, %r124; + // inline asm + call (%rd103), _rt_buffer_get_64, (%rd92, %r53, %r54, %rd105, %rd106, %rd162, %rd162); + // inline asm + ld.u16 %rs19, [%rd103+4]; + // inline asm + { cvt.f32.f16 %f28, %rs19;} + + // inline asm + fma.rn.f32 %f50, %f26, 0f40000000, 0fBF800000; + fma.rn.f32 %f51, %f27, 0f40000000, 0fBF800000; + fma.rn.f32 %f52, %f28, 0f40000000, 0fBF800000; + mul.f32 %f53, %f23, %f50; + mul.f32 %f54, %f24, %f51; + mul.f32 %f55, %f25, %f52; + ld.global.v2.u32 {%r127, %r128}, [pixelID]; + cvt.u64.u32 %rd111, %r127; + cvt.u64.u32 %rd112, %r128; + mov.u64 %rd169, outputImageL1y; + cvta.global.u64 %rd110, %rd169; + // inline asm + call (%rd109), _rt_buffer_get_64, (%rd110, %r53, %r54, %rd111, %rd112, %rd162, %rd162); + // inline asm + ld.u16 %rs20, [%rd109]; + // inline asm + { cvt.f32.f16 %f29, %rs20;} + + // inline asm + ld.global.v2.u32 {%r131, %r132}, [pixelID]; + cvt.u64.u32 %rd117, %r131; + cvt.u64.u32 %rd118, %r132; + // inline asm + call (%rd115), _rt_buffer_get_64, (%rd110, %r53, %r54, %rd117, %rd118, %rd162, %rd162); + // inline asm + ld.u16 %rs21, [%rd115+2]; + // inline asm + { cvt.f32.f16 %f30, %rs21;} + + // inline asm + ld.global.v2.u32 {%r135, %r136}, [pixelID]; + cvt.u64.u32 %rd123, %r135; + cvt.u64.u32 %rd124, %r136; + // inline asm + call (%rd121), _rt_buffer_get_64, (%rd110, %r53, %r54, %rd123, %rd124, %rd162, %rd162); + // inline asm + ld.u16 %rs22, [%rd121+4]; + // inline asm + { cvt.f32.f16 %f31, %rs22;} + + // inline asm + fma.rn.f32 %f56, %f29, 0f40000000, 0fBF800000; + fma.rn.f32 %f57, %f30, 0f40000000, 0fBF800000; + fma.rn.f32 %f58, %f31, 0f40000000, 0fBF800000; + mul.f32 %f59, %f23, %f56; + mul.f32 %f60, %f24, %f57; + mul.f32 %f61, %f25, %f58; + ld.global.v2.u32 {%r139, %r140}, [pixelID]; + cvt.u64.u32 %rd129, %r139; + cvt.u64.u32 %rd130, %r140; + mov.u64 %rd170, outputImageL1z; + cvta.global.u64 %rd128, %rd170; + // inline asm + call (%rd127), _rt_buffer_get_64, (%rd128, %r53, %r54, %rd129, %rd130, %rd162, %rd162); + // inline asm + ld.u16 %rs23, [%rd127]; + // inline asm + { cvt.f32.f16 %f32, %rs23;} + + // inline asm + ld.global.v2.u32 {%r143, %r144}, [pixelID]; + cvt.u64.u32 %rd135, %r143; + cvt.u64.u32 %rd136, %r144; + // inline asm + call (%rd133), _rt_buffer_get_64, (%rd128, %r53, %r54, %rd135, %rd136, %rd162, %rd162); + // inline asm + ld.u16 %rs24, [%rd133+2]; + // inline asm + { cvt.f32.f16 %f33, %rs24;} + + // inline asm + ld.global.v2.u32 {%r147, %r148}, [pixelID]; + cvt.u64.u32 %rd141, %r147; + cvt.u64.u32 %rd142, %r148; + // inline asm + call (%rd139), _rt_buffer_get_64, (%rd128, %r53, %r54, %rd141, %rd142, %rd162, %rd162); + // inline asm + ld.u16 %rs25, [%rd139+4]; + // inline asm + { cvt.f32.f16 %f34, %rs25;} + + // inline asm + fma.rn.f32 %f62, %f32, 0f40000000, 0fBF800000; + fma.rn.f32 %f63, %f33, 0f40000000, 0fBF800000; + fma.rn.f32 %f64, %f34, 0f40000000, 0fBF800000; + mul.f32 %f65, %f23, %f62; + mul.f32 %f66, %f24, %f63; + mul.f32 %f67, %f25, %f64; + ld.global.v2.u32 {%r151, %r152}, [pixelID]; + cvt.u64.u32 %rd147, %r151; + cvt.u64.u32 %rd148, %r152; + // inline asm + call (%rd145), _rt_buffer_get_64, (%rd2, %r53, %r54, %rd147, %rd148, %rd162, %rd162); + // inline asm + ld.u16 %rs26, [%rd145+6]; + // inline asm + { cvt.f32.f16 %f35, %rs26;} + + // inline asm + ld.global.v2.u32 {%r155, %r156}, [pixelID]; + cvt.u64.u32 %rd153, %r155; + cvt.u64.u32 %rd154, %r156; + // inline asm + call (%rd151), _rt_buffer_get_64, (%rd74, %r53, %r54, %rd153, %rd154, %rd162, %rd162); + // inline asm + ld.u16 %rs27, [%rd151+6]; + // inline asm + { cvt.f32.f16 %f36, %rs27;} + + // inline asm + min.f32 %f40, %f35, %f36; + add.f32 %f68, %f11, %f23; + add.f32 %f69, %f12, %f24; + add.f32 %f70, %f13, %f25; + fma.rn.f32 %f71, %f11, %f41, %f53; + fma.rn.f32 %f72, %f12, %f42, %f54; + fma.rn.f32 %f73, %f13, %f43, %f55; + fma.rn.f32 %f74, %f11, %f44, %f59; + fma.rn.f32 %f75, %f12, %f45, %f60; + fma.rn.f32 %f76, %f13, %f46, %f61; + fma.rn.f32 %f77, %f11, %f47, %f65; + fma.rn.f32 %f78, %f12, %f48, %f66; + fma.rn.f32 %f79, %f13, %f49, %f67; + mov.f32 %f80, 0f34000000; + max.f32 %f81, %f68, %f80; + max.f32 %f82, %f69, %f80; + max.f32 %f83, %f70, %f80; + div.rn.f32 %f84, %f71, %f81; + div.rn.f32 %f85, %f72, %f82; + div.rn.f32 %f86, %f73, %f83; + fma.rn.f32 %f87, %f84, 0f3F000000, 0f3F000000; + fma.rn.f32 %f88, %f85, 0f3F000000, 0f3F000000; + fma.rn.f32 %f89, %f86, 0f3F000000, 0f3F000000; + div.rn.f32 %f90, %f74, %f81; + div.rn.f32 %f91, %f75, %f82; + div.rn.f32 %f92, %f76, %f83; + fma.rn.f32 %f93, %f90, 0f3F000000, 0f3F000000; + fma.rn.f32 %f94, %f91, 0f3F000000, 0f3F000000; + fma.rn.f32 %f95, %f92, 0f3F000000, 0f3F000000; + div.rn.f32 %f96, %f77, %f81; + div.rn.f32 %f97, %f78, %f82; + div.rn.f32 %f98, %f79, %f83; + fma.rn.f32 %f99, %f96, 0f3F000000, 0f3F000000; + fma.rn.f32 %f100, %f97, 0f3F000000, 0f3F000000; + fma.rn.f32 %f101, %f98, 0f3F000000, 0f3F000000; + mul.f32 %f37, %f68, %f40; + mul.f32 %f38, %f69, %f40; + mul.f32 %f39, %f70, %f40; + mul.f32 %f2, %f40, %f87; + mul.f32 %f3, %f40, %f88; + mul.f32 %f4, %f40, %f89; + mul.f32 %f5, %f40, %f93; + mul.f32 %f6, %f40, %f94; + mul.f32 %f7, %f40, %f95; + mul.f32 %f8, %f40, %f99; + mul.f32 %f9, %f40, %f100; + mul.f32 %f10, %f40, %f101; + ld.global.v2.u32 {%r159, %r160}, [pixelID]; + cvt.u64.u32 %rd159, %r159; + cvt.u64.u32 %rd160, %r160; + // inline asm + call (%rd157), _rt_buffer_get_64, (%rd74, %r53, %r54, %rd159, %rd160, %rd162, %rd162); + // inline asm + // inline asm + { cvt.rn.f16.f32 %rs31, %f40;} + + // inline asm + // inline asm + { cvt.rn.f16.f32 %rs30, %f39;} + + // inline asm + // inline asm + { cvt.rn.f16.f32 %rs29, %f38;} + + // inline asm + // inline asm + { cvt.rn.f16.f32 %rs28, %f37;} + + // inline asm + st.v4.u16 [%rd157], {%rs28, %rs29, %rs30, %rs31}; + ld.global.f32 %f102, [DoPack]; + setp.gt.f32 %p1, %f102, 0f3F000000; + @%p1 bra BB0_2; + bra.uni BB0_1; + +BB0_2: + mul.f32 %f112, %f2, 0f437F0000; + mov.f32 %f113, 0f437F0000; + min.f32 %f114, %f112, %f113; + mul.f32 %f115, %f3, 0f437F0000; + min.f32 %f116, %f115, %f113; + mul.f32 %f117, %f4, 0f437F0000; + min.f32 %f118, %f117, %f113; + mul.f32 %f119, %f5, 0f437F0000; + min.f32 %f120, %f119, %f113; + mul.f32 %f121, %f6, 0f437F0000; + min.f32 %f122, %f121, %f113; + mul.f32 %f123, %f7, 0f437F0000; + min.f32 %f124, %f123, %f113; + mul.f32 %f125, %f8, 0f437F0000; + min.f32 %f126, %f125, %f113; + mul.f32 %f127, %f9, 0f437F0000; + min.f32 %f128, %f127, %f113; + mul.f32 %f129, %f10, 0f437F0000; + min.f32 %f130, %f129, %f113; + ld.global.v2.u32 {%r187, %r188}, [pixelID]; + cvt.u64.u32 %rd194, %r187; + cvt.u64.u32 %rd195, %r188; + mov.u64 %rd210, packedImageL1x; + cvta.global.u64 %rd193, %rd210; + mov.u32 %r186, 4; + // inline asm + call (%rd192), _rt_buffer_get_64, (%rd193, %r53, %r186, %rd194, %rd195, %rd162, %rd162); + // inline asm + cvt.rzi.u32.f32 %r191, %f114; + cvt.rzi.u32.f32 %r192, %f116; + cvt.rzi.u32.f32 %r193, %f118; + mul.f32 %f131, %f40, 0f437F0000; + cvt.rzi.u32.f32 %r194, %f131; + cvt.u16.u32 %rs41, %r193; + cvt.u16.u32 %rs42, %r192; + cvt.u16.u32 %rs43, %r191; + cvt.u16.u32 %rs44, %r194; + st.v4.u8 [%rd192], {%rs43, %rs42, %rs41, %rs44}; + ld.global.v2.u32 {%r195, %r196}, [pixelID]; + cvt.u64.u32 %rd200, %r195; + cvt.u64.u32 %rd201, %r196; + mov.u64 %rd211, packedImageL1y; + cvta.global.u64 %rd199, %rd211; + // inline asm + call (%rd198), _rt_buffer_get_64, (%rd199, %r53, %r186, %rd200, %rd201, %rd162, %rd162); + // inline asm + cvt.rzi.u32.f32 %r199, %f120; + cvt.rzi.u32.f32 %r200, %f122; + cvt.rzi.u32.f32 %r201, %f124; + cvt.u16.u32 %rs45, %r201; + cvt.u16.u32 %rs46, %r200; + cvt.u16.u32 %rs47, %r199; + st.v4.u8 [%rd198], {%rs47, %rs46, %rs45, %rs44}; + ld.global.v2.u32 {%r202, %r203}, [pixelID]; + cvt.u64.u32 %rd206, %r202; + cvt.u64.u32 %rd207, %r203; + mov.u64 %rd212, packedImageL1z; + cvta.global.u64 %rd205, %rd212; + // inline asm + call (%rd204), _rt_buffer_get_64, (%rd205, %r53, %r186, %rd206, %rd207, %rd162, %rd162); + // inline asm + cvt.rzi.u32.f32 %r206, %f126; + cvt.rzi.u32.f32 %r207, %f128; + cvt.rzi.u32.f32 %r208, %f130; + cvt.u16.u32 %rs48, %r208; + cvt.u16.u32 %rs49, %r207; + cvt.u16.u32 %rs50, %r206; + st.v4.u8 [%rd204], {%rs50, %rs49, %rs48, %rs44}; + bra.uni BB0_3; + +BB0_1: + ld.global.v2.u32 {%r169, %r170}, [pixelID]; + cvt.u64.u32 %rd173, %r169; + cvt.u64.u32 %rd174, %r170; + // inline asm + call (%rd171), _rt_buffer_get_64, (%rd92, %r53, %r54, %rd173, %rd174, %rd162, %rd162); + // inline asm + cvt.sat.f32.f32 %f103, %f2; + cvt.sat.f32.f32 %f104, %f3; + cvt.sat.f32.f32 %f105, %f4; + // inline asm + { cvt.rn.f16.f32 %rs34, %f105;} + + // inline asm + // inline asm + { cvt.rn.f16.f32 %rs33, %f104;} + + // inline asm + // inline asm + { cvt.rn.f16.f32 %rs32, %f103;} + + // inline asm + st.v4.u16 [%rd171], {%rs32, %rs33, %rs34, %rs31}; + ld.global.v2.u32 {%r173, %r174}, [pixelID]; + cvt.u64.u32 %rd179, %r173; + cvt.u64.u32 %rd180, %r174; + // inline asm + call (%rd177), _rt_buffer_get_64, (%rd110, %r53, %r54, %rd179, %rd180, %rd162, %rd162); + // inline asm + cvt.sat.f32.f32 %f106, %f5; + cvt.sat.f32.f32 %f107, %f6; + cvt.sat.f32.f32 %f108, %f7; + // inline asm + { cvt.rn.f16.f32 %rs37, %f108;} + + // inline asm + // inline asm + { cvt.rn.f16.f32 %rs36, %f107;} + + // inline asm + // inline asm + { cvt.rn.f16.f32 %rs35, %f106;} + + // inline asm + st.v4.u16 [%rd177], {%rs35, %rs36, %rs37, %rs31}; + ld.global.v2.u32 {%r177, %r178}, [pixelID]; + cvt.u64.u32 %rd185, %r177; + cvt.u64.u32 %rd186, %r178; + // inline asm + call (%rd183), _rt_buffer_get_64, (%rd128, %r53, %r54, %rd185, %rd186, %rd162, %rd162); + // inline asm + cvt.sat.f32.f32 %f109, %f8; + cvt.sat.f32.f32 %f110, %f9; + cvt.sat.f32.f32 %f111, %f10; + // inline asm + { cvt.rn.f16.f32 %rs40, %f111;} + + // inline asm + // inline asm + { cvt.rn.f16.f32 %rs39, %f110;} + + // inline asm + // inline asm + { cvt.rn.f16.f32 %rs38, %f109;} + + // inline asm + st.v4.u16 [%rd183], {%rs38, %rs39, %rs40, %rs31}; + +BB0_3: + ret; +} + + |