diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..51442de --- /dev/null +++ b/.editorconfig @@ -0,0 +1,73 @@ +# see http://editorconfig.org/ for docs on this file + +root = true + +[*] +# help with sharing files across os's (i.e. network share or through local vm) +end_of_line = lf +#charset temporarily disabled due to bug in VS2017 changing to UTF-8 with BOM (https://favro.com/card/c564ede4ed3337f7b17986b6/Uni-17877) +#charset = utf-8 +trim_trailing_whitespace = true +insert_final_newline = true + +# formattable file extensions (keep in sync with format.ini from unity-meta repo) +# +# Note: We need to split the formattable files configs into shorter duplicate entries (logically grouped) +# due to known issue in VS editorconfig extension where there is a limit of 51 characters (empirically determined). +# see: https://github.com/editorconfig/editorconfig-visualstudio/issues/21 +# +## uncrustify +[*.{c,h,cpp,hpp,m,mm,cc,cs}] +indent_style = space +indent_size = 4 + +## generic formatter (shaders) +[*.{cg,cginc,glslinc,hlsl,shader,y,ypp,yy}] +indent_style = space +indent_size = 4 + +## generic formatter (misc) +[*.{asm,s,S,pch,pchmm,java,sh,uss}] +indent_style = space +indent_size = 4 + +## perltidy +[*.{pl,pm,t,it}] +indent_style = space +indent_size = 4 + +## unity special +[*.{bindings,mem.xml}] +indent_style = space +indent_size = 4 + +# other filetypes we want to overwrite default configuration to preserve the standard +[{Makefile,makefile}] +# TAB characters are part of the Makefile format +indent_style = tab + +[*.{md,markdown}] +# trailing whitespace is significant in markdown (bad choice, bad!) +trim_trailing_whitespace = false + +# keep these and the VS stuff below in sync with .hgeol's CRLF extensions +[*.{vcproj,bat,cmd,xaml,tt,t4,ttinclude}] +end_of_line = crlf + +# this VS-specific stuff is based on experiments to see how VS will modify a file after it has been manually edited. +# the settings are meant to closely match what VS does to minimize unnecessary diffs. this duplicates some settings in * +# but let's be explicit here to be safe (in case someone wants to copy-paste this out to another .editorconfig). +[*.{vcxproj,vcxproj.filters,csproj,props,targets}] +indent_style = space +indent_size = 2 +end_of_line = crlf +charset = utf-8-bom +trim_trailing_whitespace = true +insert_final_newline = false +[*.{sln,sln.template}] +indent_style = tab +indent_size = 4 +end_of_line = crlf +charset = utf-8 +trim_trailing_whitespace = true +insert_final_newline = false diff --git a/CMakeLists.txt b/CMakeLists.txt index 782757d..009f5eb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -50,9 +50,9 @@ target_include_directories(hlslcc $ $) set_target_properties(hlslcc PROPERTIES - CXX_STANDARD 11 - CXX_STANDARD_REQUIRED ON - PUBLIC_HEADERS ${HLSLCC_HDRS}) + "CXX_STANDARD" "11" + "CXX_STANDARD_REQUIRED" "ON" + "PUBLIC_HEADERS" "${HLSLCC_HDRS}") install(TARGETS hlslcc EXPORT hlslccConfig ARCHIVE DESTINATION lib diff --git a/README.md b/README.md index 869383b..9fe4a5b 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ Originally based on https://github.com/James-Jones/HLSLCrossCompiler. This library takes DirectX bytecode as input, and translates it into the following languages: - GLSL (OpenGL 3.2 and later) -- GLSL ES (OpenGL ES 3.0 and later) +- GLSL ES (OpenGL ES 2.0 and later) - GLSL for Vulkan consumption (as input for Glslang to generate SPIR-V) - Metal Shading Language @@ -46,6 +46,8 @@ The main entry point is TranslateHLSLFromMem() function in HLSLcc.cpp (taking DX - Florian Penzkofer - Alexey Orlov - Povilas Kanapickas +- Aleksandr Kirillov +- Kay Chang ## License diff --git a/include/ShaderInfo.h b/include/ShaderInfo.h index 30791b3..dbaf258 100644 --- a/include/ShaderInfo.h +++ b/include/ShaderInfo.h @@ -10,398 +10,401 @@ //Reflection #define MAX_RESOURCE_BINDINGS 256 -typedef enum _SHADER_VARIABLE_TYPE { - SVT_VOID = 0, - SVT_BOOL = 1, - SVT_INT = 2, - SVT_FLOAT = 3, - SVT_STRING = 4, - SVT_TEXTURE = 5, - SVT_TEXTURE1D = 6, - SVT_TEXTURE2D = 7, - SVT_TEXTURE3D = 8, - SVT_TEXTURECUBE = 9, - SVT_SAMPLER = 10, - SVT_PIXELSHADER = 15, - SVT_VERTEXSHADER = 16, - SVT_UINT = 19, - SVT_UINT8 = 20, - SVT_GEOMETRYSHADER = 21, - SVT_RASTERIZER = 22, - SVT_DEPTHSTENCIL = 23, - SVT_BLEND = 24, - SVT_BUFFER = 25, - SVT_CBUFFER = 26, - SVT_TBUFFER = 27, - SVT_TEXTURE1DARRAY = 28, - SVT_TEXTURE2DARRAY = 29, - SVT_RENDERTARGETVIEW = 30, - SVT_DEPTHSTENCILVIEW = 31, - SVT_TEXTURE2DMS = 32, - SVT_TEXTURE2DMSARRAY = 33, - SVT_TEXTURECUBEARRAY = 34, - SVT_HULLSHADER = 35, - SVT_DOMAINSHADER = 36, - SVT_INTERFACE_POINTER = 37, - SVT_COMPUTESHADER = 38, - SVT_DOUBLE = 39, - SVT_RWTEXTURE1D = 40, - SVT_RWTEXTURE1DARRAY = 41, - SVT_RWTEXTURE2D = 42, - SVT_RWTEXTURE2DARRAY = 43, - SVT_RWTEXTURE3D = 44, - SVT_RWBUFFER = 45, - SVT_BYTEADDRESS_BUFFER = 46, - SVT_RWBYTEADDRESS_BUFFER = 47, - SVT_STRUCTURED_BUFFER = 48, - SVT_RWSTRUCTURED_BUFFER = 49, - SVT_APPEND_STRUCTURED_BUFFER = 50, - SVT_CONSUME_STRUCTURED_BUFFER = 51, - - - - // Only used as a marker when analyzing register types - SVT_FORCED_INT = 152, - // Integer that can be either signed or unsigned. Only used as an intermediate step when doing data type analysis - SVT_INT_AMBIGUOUS = 153, - - // Partial precision types. Used when doing type analysis - SVT_FLOAT10 = 53, // Seems to be used in constant buffers - SVT_FLOAT16 = 54, - SVT_INT16 = 156, - SVT_INT12 = 157, - SVT_UINT16 = 158, - - SVT_FORCE_DWORD = 0x7fffffff +typedef enum _SHADER_VARIABLE_TYPE +{ + SVT_VOID = 0, + SVT_BOOL = 1, + SVT_INT = 2, + SVT_FLOAT = 3, + SVT_STRING = 4, + SVT_TEXTURE = 5, + SVT_TEXTURE1D = 6, + SVT_TEXTURE2D = 7, + SVT_TEXTURE3D = 8, + SVT_TEXTURECUBE = 9, + SVT_SAMPLER = 10, + SVT_PIXELSHADER = 15, + SVT_VERTEXSHADER = 16, + SVT_UINT = 19, + SVT_UINT8 = 20, + SVT_GEOMETRYSHADER = 21, + SVT_RASTERIZER = 22, + SVT_DEPTHSTENCIL = 23, + SVT_BLEND = 24, + SVT_BUFFER = 25, + SVT_CBUFFER = 26, + SVT_TBUFFER = 27, + SVT_TEXTURE1DARRAY = 28, + SVT_TEXTURE2DARRAY = 29, + SVT_RENDERTARGETVIEW = 30, + SVT_DEPTHSTENCILVIEW = 31, + SVT_TEXTURE2DMS = 32, + SVT_TEXTURE2DMSARRAY = 33, + SVT_TEXTURECUBEARRAY = 34, + SVT_HULLSHADER = 35, + SVT_DOMAINSHADER = 36, + SVT_INTERFACE_POINTER = 37, + SVT_COMPUTESHADER = 38, + SVT_DOUBLE = 39, + SVT_RWTEXTURE1D = 40, + SVT_RWTEXTURE1DARRAY = 41, + SVT_RWTEXTURE2D = 42, + SVT_RWTEXTURE2DARRAY = 43, + SVT_RWTEXTURE3D = 44, + SVT_RWBUFFER = 45, + SVT_BYTEADDRESS_BUFFER = 46, + SVT_RWBYTEADDRESS_BUFFER = 47, + SVT_STRUCTURED_BUFFER = 48, + SVT_RWSTRUCTURED_BUFFER = 49, + SVT_APPEND_STRUCTURED_BUFFER = 50, + SVT_CONSUME_STRUCTURED_BUFFER = 51, + + + // Only used as a marker when analyzing register types + SVT_FORCED_INT = 152, + // Integer that can be either signed or unsigned. Only used as an intermediate step when doing data type analysis + SVT_INT_AMBIGUOUS = 153, + + // Partial precision types. Used when doing type analysis + SVT_FLOAT10 = 53, // Seems to be used in constant buffers + SVT_FLOAT16 = 54, + SVT_INT16 = 156, + SVT_INT12 = 157, + SVT_UINT16 = 158, + + SVT_FORCE_DWORD = 0x7fffffff } SHADER_VARIABLE_TYPE; -typedef enum _SHADER_VARIABLE_CLASS { - SVC_SCALAR = 0, - SVC_VECTOR = (SVC_SCALAR + 1), - SVC_MATRIX_ROWS = (SVC_VECTOR + 1), - SVC_MATRIX_COLUMNS = (SVC_MATRIX_ROWS + 1), - SVC_OBJECT = (SVC_MATRIX_COLUMNS + 1), - SVC_STRUCT = (SVC_OBJECT + 1), - SVC_INTERFACE_CLASS = (SVC_STRUCT + 1), - SVC_INTERFACE_POINTER = (SVC_INTERFACE_CLASS + 1), - SVC_FORCE_DWORD = 0x7fffffff +typedef enum _SHADER_VARIABLE_CLASS +{ + SVC_SCALAR = 0, + SVC_VECTOR = (SVC_SCALAR + 1), + SVC_MATRIX_ROWS = (SVC_VECTOR + 1), + SVC_MATRIX_COLUMNS = (SVC_MATRIX_ROWS + 1), + SVC_OBJECT = (SVC_MATRIX_COLUMNS + 1), + SVC_STRUCT = (SVC_OBJECT + 1), + SVC_INTERFACE_CLASS = (SVC_STRUCT + 1), + SVC_INTERFACE_POINTER = (SVC_INTERFACE_CLASS + 1), + SVC_FORCE_DWORD = 0x7fffffff } SHADER_VARIABLE_CLASS; - /////////////////////////////////////// // Types enum TESSELLATOR_PARTITIONING { - TESSELLATOR_PARTITIONING_UNDEFINED = 0, - TESSELLATOR_PARTITIONING_INTEGER = 1, - TESSELLATOR_PARTITIONING_POW2 = 2, - TESSELLATOR_PARTITIONING_FRACTIONAL_ODD = 3, - TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN = 4 + TESSELLATOR_PARTITIONING_UNDEFINED = 0, + TESSELLATOR_PARTITIONING_INTEGER = 1, + TESSELLATOR_PARTITIONING_POW2 = 2, + TESSELLATOR_PARTITIONING_FRACTIONAL_ODD = 3, + TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN = 4 }; enum TESSELLATOR_OUTPUT_PRIMITIVE { - TESSELLATOR_OUTPUT_UNDEFINED = 0, - TESSELLATOR_OUTPUT_POINT = 1, - TESSELLATOR_OUTPUT_LINE = 2, - TESSELLATOR_OUTPUT_TRIANGLE_CW = 3, - TESSELLATOR_OUTPUT_TRIANGLE_CCW = 4 + TESSELLATOR_OUTPUT_UNDEFINED = 0, + TESSELLATOR_OUTPUT_POINT = 1, + TESSELLATOR_OUTPUT_LINE = 2, + TESSELLATOR_OUTPUT_TRIANGLE_CW = 3, + TESSELLATOR_OUTPUT_TRIANGLE_CCW = 4 }; typedef enum TESSELLATOR_DOMAIN { - TESSELLATOR_DOMAIN_UNDEFINED = 0, - TESSELLATOR_DOMAIN_ISOLINE = 1, - TESSELLATOR_DOMAIN_TRI = 2, - TESSELLATOR_DOMAIN_QUAD = 3 + TESSELLATOR_DOMAIN_UNDEFINED = 0, + TESSELLATOR_DOMAIN_ISOLINE = 1, + TESSELLATOR_DOMAIN_TRI = 2, + TESSELLATOR_DOMAIN_QUAD = 3 } TESSELLATOR_DOMAIN; enum SPECIAL_NAME { - NAME_UNDEFINED = 0, - NAME_POSITION = 1, - NAME_CLIP_DISTANCE = 2, - NAME_CULL_DISTANCE = 3, - NAME_RENDER_TARGET_ARRAY_INDEX = 4, - NAME_VIEWPORT_ARRAY_INDEX = 5, - NAME_VERTEX_ID = 6, - NAME_PRIMITIVE_ID = 7, - NAME_INSTANCE_ID = 8, - NAME_IS_FRONT_FACE = 9, - NAME_SAMPLE_INDEX = 10, - // The following are added for D3D11 - NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR = 11, - NAME_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR = 12, - NAME_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR = 13, - NAME_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR = 14, - NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR = 15, - NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR = 16, - NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR = 17, - NAME_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR = 18, - NAME_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR = 19, - NAME_FINAL_TRI_INSIDE_TESSFACTOR = 20, - NAME_FINAL_LINE_DETAIL_TESSFACTOR = 21, - NAME_FINAL_LINE_DENSITY_TESSFACTOR = 22, + NAME_UNDEFINED = 0, + NAME_POSITION = 1, + NAME_CLIP_DISTANCE = 2, + NAME_CULL_DISTANCE = 3, + NAME_RENDER_TARGET_ARRAY_INDEX = 4, + NAME_VIEWPORT_ARRAY_INDEX = 5, + NAME_VERTEX_ID = 6, + NAME_PRIMITIVE_ID = 7, + NAME_INSTANCE_ID = 8, + NAME_IS_FRONT_FACE = 9, + NAME_SAMPLE_INDEX = 10, + // The following are added for D3D11 + NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR = 11, + NAME_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR = 12, + NAME_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR = 13, + NAME_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR = 14, + NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR = 15, + NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR = 16, + NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR = 17, + NAME_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR = 18, + NAME_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR = 19, + NAME_FINAL_TRI_INSIDE_TESSFACTOR = 20, + NAME_FINAL_LINE_DETAIL_TESSFACTOR = 21, + NAME_FINAL_LINE_DENSITY_TESSFACTOR = 22, }; -enum INOUT_COMPONENT_TYPE { - INOUT_COMPONENT_UNKNOWN = 0, - INOUT_COMPONENT_UINT32 = 1, - INOUT_COMPONENT_SINT32 = 2, - INOUT_COMPONENT_FLOAT32 = 3 +enum INOUT_COMPONENT_TYPE +{ + INOUT_COMPONENT_UNKNOWN = 0, + INOUT_COMPONENT_UINT32 = 1, + INOUT_COMPONENT_SINT32 = 2, + INOUT_COMPONENT_FLOAT32 = 3 }; -enum MIN_PRECISION { - MIN_PRECISION_DEFAULT = 0, - MIN_PRECISION_FLOAT_16 = 1, - MIN_PRECISION_FLOAT_2_8 = 2, - MIN_PRECISION_RESERVED = 3, - MIN_PRECISION_SINT_16 = 4, - MIN_PRECISION_UINT_16 = 5, - MIN_PRECISION_ANY_16 = 0xf0, - MIN_PRECISION_ANY_10 = 0xf1 +enum MIN_PRECISION +{ + MIN_PRECISION_DEFAULT = 0, + MIN_PRECISION_FLOAT_16 = 1, + MIN_PRECISION_FLOAT_2_8 = 2, + MIN_PRECISION_RESERVED = 3, + MIN_PRECISION_SINT_16 = 4, + MIN_PRECISION_UINT_16 = 5, + MIN_PRECISION_ANY_16 = 0xf0, + MIN_PRECISION_ANY_10 = 0xf1 }; enum ResourceType { - RTYPE_CBUFFER,//0 - RTYPE_TBUFFER,//1 - RTYPE_TEXTURE,//2 - RTYPE_SAMPLER,//3 - RTYPE_UAV_RWTYPED,//4 - RTYPE_STRUCTURED,//5 - RTYPE_UAV_RWSTRUCTURED,//6 - RTYPE_BYTEADDRESS,//7 - RTYPE_UAV_RWBYTEADDRESS,//8 - RTYPE_UAV_APPEND_STRUCTURED,//9 - RTYPE_UAV_CONSUME_STRUCTURED,//10 - RTYPE_UAV_RWSTRUCTURED_WITH_COUNTER,//11 - RTYPE_COUNT, + RTYPE_CBUFFER,//0 + RTYPE_TBUFFER,//1 + RTYPE_TEXTURE,//2 + RTYPE_SAMPLER,//3 + RTYPE_UAV_RWTYPED,//4 + RTYPE_STRUCTURED,//5 + RTYPE_UAV_RWSTRUCTURED,//6 + RTYPE_BYTEADDRESS,//7 + RTYPE_UAV_RWBYTEADDRESS,//8 + RTYPE_UAV_APPEND_STRUCTURED,//9 + RTYPE_UAV_CONSUME_STRUCTURED,//10 + RTYPE_UAV_RWSTRUCTURED_WITH_COUNTER,//11 + RTYPE_COUNT, }; -enum ResourceGroup { - RGROUP_CBUFFER, - RGROUP_TEXTURE, - RGROUP_SAMPLER, - RGROUP_UAV, - RGROUP_COUNT, +enum ResourceGroup +{ + RGROUP_CBUFFER, + RGROUP_TEXTURE, + RGROUP_SAMPLER, + RGROUP_UAV, + RGROUP_COUNT, }; enum REFLECT_RESOURCE_DIMENSION { - REFLECT_RESOURCE_DIMENSION_UNKNOWN = 0, - REFLECT_RESOURCE_DIMENSION_BUFFER = 1, - REFLECT_RESOURCE_DIMENSION_TEXTURE1D = 2, - REFLECT_RESOURCE_DIMENSION_TEXTURE1DARRAY = 3, - REFLECT_RESOURCE_DIMENSION_TEXTURE2D = 4, - REFLECT_RESOURCE_DIMENSION_TEXTURE2DARRAY = 5, - REFLECT_RESOURCE_DIMENSION_TEXTURE2DMS = 6, - REFLECT_RESOURCE_DIMENSION_TEXTURE2DMSARRAY = 7, - REFLECT_RESOURCE_DIMENSION_TEXTURE3D = 8, - REFLECT_RESOURCE_DIMENSION_TEXTURECUBE = 9, - REFLECT_RESOURCE_DIMENSION_TEXTURECUBEARRAY = 10, - REFLECT_RESOURCE_DIMENSION_BUFFEREX = 11, + REFLECT_RESOURCE_DIMENSION_UNKNOWN = 0, + REFLECT_RESOURCE_DIMENSION_BUFFER = 1, + REFLECT_RESOURCE_DIMENSION_TEXTURE1D = 2, + REFLECT_RESOURCE_DIMENSION_TEXTURE1DARRAY = 3, + REFLECT_RESOURCE_DIMENSION_TEXTURE2D = 4, + REFLECT_RESOURCE_DIMENSION_TEXTURE2DARRAY = 5, + REFLECT_RESOURCE_DIMENSION_TEXTURE2DMS = 6, + REFLECT_RESOURCE_DIMENSION_TEXTURE2DMSARRAY = 7, + REFLECT_RESOURCE_DIMENSION_TEXTURE3D = 8, + REFLECT_RESOURCE_DIMENSION_TEXTURECUBE = 9, + REFLECT_RESOURCE_DIMENSION_TEXTURECUBEARRAY = 10, + REFLECT_RESOURCE_DIMENSION_BUFFEREX = 11, }; enum REFLECT_RESOURCE_PRECISION { - REFLECT_RESOURCE_PRECISION_UNKNOWN = 0, - REFLECT_RESOURCE_PRECISION_LOWP = 1, - REFLECT_RESOURCE_PRECISION_MEDIUMP = 2, - REFLECT_RESOURCE_PRECISION_HIGHP = 3, - + REFLECT_RESOURCE_PRECISION_UNKNOWN = 0, + REFLECT_RESOURCE_PRECISION_LOWP = 1, + REFLECT_RESOURCE_PRECISION_MEDIUMP = 2, + REFLECT_RESOURCE_PRECISION_HIGHP = 3, }; enum RESOURCE_RETURN_TYPE { - RETURN_TYPE_UNORM = 1, - RETURN_TYPE_SNORM = 2, - RETURN_TYPE_SINT = 3, - RETURN_TYPE_UINT = 4, - RETURN_TYPE_FLOAT = 5, - RETURN_TYPE_MIXED = 6, - RETURN_TYPE_DOUBLE = 7, - RETURN_TYPE_CONTINUED = 8, - RETURN_TYPE_UNUSED = 9, + RETURN_TYPE_UNORM = 1, + RETURN_TYPE_SNORM = 2, + RETURN_TYPE_SINT = 3, + RETURN_TYPE_UINT = 4, + RETURN_TYPE_FLOAT = 5, + RETURN_TYPE_MIXED = 6, + RETURN_TYPE_DOUBLE = 7, + RETURN_TYPE_CONTINUED = 8, + RETURN_TYPE_UNUSED = 9, }; typedef std::map HLSLccSamplerPrecisionInfo; struct ResourceBinding { - std::string name; - ResourceType eType; - uint32_t ui32BindPoint; - uint32_t ui32BindCount; - uint32_t ui32Flags; - REFLECT_RESOURCE_DIMENSION eDimension; - RESOURCE_RETURN_TYPE ui32ReturnType; - uint32_t ui32NumSamples; - REFLECT_RESOURCE_PRECISION ePrecision; - int m_SamplerMode; // (SB_SAMPLER_MODE) For samplers, this is the sampler mode this sampler is declared with - - SHADER_VARIABLE_TYPE GetDataType() const - { - switch (ePrecision) - { - case REFLECT_RESOURCE_PRECISION_LOWP: - switch (ui32ReturnType) - { - case RETURN_TYPE_UNORM: - case RETURN_TYPE_SNORM: - case RETURN_TYPE_FLOAT: - return SVT_FLOAT10; - case RETURN_TYPE_SINT: - return SVT_INT16; - case RETURN_TYPE_UINT: - return SVT_UINT16; - default: -// ASSERT(0); - return SVT_FLOAT10; - } - - case REFLECT_RESOURCE_PRECISION_MEDIUMP: - switch (ui32ReturnType) - { - case RETURN_TYPE_UNORM: - case RETURN_TYPE_SNORM: - case RETURN_TYPE_FLOAT: - return SVT_FLOAT16; - case RETURN_TYPE_SINT: - return SVT_INT16; - case RETURN_TYPE_UINT: - return SVT_UINT16; - default: -// ASSERT(0); - return SVT_FLOAT16; - } - - default: - switch (ui32ReturnType) - { - case RETURN_TYPE_UNORM: - case RETURN_TYPE_SNORM: - case RETURN_TYPE_FLOAT: - return SVT_FLOAT; - case RETURN_TYPE_SINT: - return SVT_INT; - case RETURN_TYPE_UINT: - return SVT_UINT; - case RETURN_TYPE_DOUBLE: - return SVT_DOUBLE; - default: -// ASSERT(0); - return SVT_FLOAT; - } - } - } + std::string name; + ResourceType eType; + uint32_t ui32BindPoint; + uint32_t ui32BindCount; + uint32_t ui32Flags; + uint32_t ui32Space; + uint32_t ui32RangeID; + REFLECT_RESOURCE_DIMENSION eDimension; + RESOURCE_RETURN_TYPE ui32ReturnType; + uint32_t ui32NumSamples; + REFLECT_RESOURCE_PRECISION ePrecision; + int m_SamplerMode; // (SB_SAMPLER_MODE) For samplers, this is the sampler mode this sampler is declared with + + SHADER_VARIABLE_TYPE GetDataType() const + { + switch (ePrecision) + { + case REFLECT_RESOURCE_PRECISION_LOWP: + switch (ui32ReturnType) + { + case RETURN_TYPE_UNORM: + case RETURN_TYPE_SNORM: + case RETURN_TYPE_FLOAT: + return SVT_FLOAT10; + case RETURN_TYPE_SINT: + return SVT_INT16; + case RETURN_TYPE_UINT: + return SVT_UINT16; + default: +// ASSERT(0); + return SVT_FLOAT10; + } + + case REFLECT_RESOURCE_PRECISION_MEDIUMP: + switch (ui32ReturnType) + { + case RETURN_TYPE_UNORM: + case RETURN_TYPE_SNORM: + case RETURN_TYPE_FLOAT: + return SVT_FLOAT16; + case RETURN_TYPE_SINT: + return SVT_INT16; + case RETURN_TYPE_UINT: + return SVT_UINT16; + default: +// ASSERT(0); + return SVT_FLOAT16; + } + + default: + switch (ui32ReturnType) + { + case RETURN_TYPE_UNORM: + case RETURN_TYPE_SNORM: + case RETURN_TYPE_FLOAT: + return SVT_FLOAT; + case RETURN_TYPE_SINT: + return SVT_INT; + case RETURN_TYPE_UINT: + return SVT_UINT; + case RETURN_TYPE_DOUBLE: + return SVT_DOUBLE; + default: +// ASSERT(0); + return SVT_FLOAT; + } + } + } }; struct ShaderVarType { - ShaderVarType() : - Class(), - Type(), - Rows(), - Columns(), - Elements(), - MemberCount(), - Offset(), - ParentCount(), - Parent(), - m_IsUsed(false) - {} - - SHADER_VARIABLE_CLASS Class; - SHADER_VARIABLE_TYPE Type; - uint32_t Rows; - uint32_t Columns; - uint32_t Elements; - uint32_t MemberCount; - uint32_t Offset; - std::string name; - - uint32_t ParentCount; - struct ShaderVarType * Parent; - //Includes all parent names. - std::string fullName; - - std::vector Members; - - bool m_IsUsed; // If not set, is not used in the shader code - - uint32_t GetMemberCount() const - { - if (Class == SVC_STRUCT) - { - uint32_t res = 0; - std::vector::const_iterator itr; - for (itr = Members.begin(); itr != Members.end(); itr++) - { - res += itr->GetMemberCount(); - } - return res; - } - else - return 1; - } - + ShaderVarType() : + Class(), + Type(), + Rows(), + Columns(), + Elements(), + MemberCount(), + Offset(), + ParentCount(), + Parent(), + m_IsUsed(false) + {} + + SHADER_VARIABLE_CLASS Class; + SHADER_VARIABLE_TYPE Type; + uint32_t Rows; + uint32_t Columns; + uint32_t Elements; + uint32_t MemberCount; + uint32_t Offset; + std::string name; + + uint32_t ParentCount; + struct ShaderVarType * Parent; + //Includes all parent names. + std::string fullName; + + std::vector Members; + + bool m_IsUsed; // If not set, is not used in the shader code + + uint32_t GetMemberCount() const + { + if (Class == SVC_STRUCT) + { + uint32_t res = 0; + std::vector::const_iterator itr; + for (itr = Members.begin(); itr != Members.end(); itr++) + { + res += itr->GetMemberCount(); + } + return res; + } + else + return 1; + } }; struct ShaderVar { - std::string name; - int haveDefaultValue; - std::vector pui32DefaultValues; - //Offset/Size in bytes. - uint32_t ui32StartOffset; - uint32_t ui32Size; - - ShaderVarType sType; + std::string name; + int haveDefaultValue; + std::vector pui32DefaultValues; + //Offset/Size in bytes. + uint32_t ui32StartOffset; + uint32_t ui32Size; + + ShaderVarType sType; }; struct ConstantBuffer { - std::string name; - - std::vector asVars; - - uint32_t ui32TotalSizeInBytes; - - uint32_t GetMemberCount(bool stripUnused) const - { - uint32_t res = 0; - std::vector::const_iterator itr; - for (itr = asVars.begin(); itr != asVars.end(); itr++) - { - if(stripUnused && !itr->sType.m_IsUsed) - continue; - res += itr->sType.GetMemberCount(); - } - return res; - } + std::string name; + + std::vector asVars; + + uint32_t ui32TotalSizeInBytes; + + uint32_t GetMemberCount(bool stripUnused) const + { + uint32_t res = 0; + std::vector::const_iterator itr; + for (itr = asVars.begin(); itr != asVars.end(); itr++) + { + if (stripUnused && !itr->sType.m_IsUsed) + continue; + res += itr->sType.GetMemberCount(); + } + return res; + } }; struct ClassType { - std::string name; - uint16_t ui16ID; - uint16_t ui16ConstBufStride; - uint16_t ui16Texture; - uint16_t ui16Sampler; + std::string name; + uint16_t ui16ID; + uint16_t ui16ConstBufStride; + uint16_t ui16Texture; + uint16_t ui16Sampler; }; struct ClassInstance { - std::string name; - uint16_t ui16ID; - uint16_t ui16ConstBuf; - uint16_t ui16ConstBufOffset; - uint16_t ui16Texture; - uint16_t ui16Sampler; + std::string name; + uint16_t ui16ID; + uint16_t ui16ConstBuf; + uint16_t ui16ConstBufOffset; + uint16_t ui16Texture; + uint16_t ui16Sampler; }; class Operand; @@ -410,100 +413,98 @@ class ShaderInfo { public: - struct InOutSignature - { - std::string semanticName; - uint32_t ui32SemanticIndex; - SPECIAL_NAME eSystemValueType; - INOUT_COMPONENT_TYPE eComponentType; - uint32_t ui32Register; - uint32_t ui32Mask; - uint32_t ui32ReadWriteMask; + struct InOutSignature + { + std::string semanticName; + uint32_t ui32SemanticIndex; + SPECIAL_NAME eSystemValueType; + INOUT_COMPONENT_TYPE eComponentType; + uint32_t ui32Register; + uint32_t ui32Mask; + uint32_t ui32ReadWriteMask; - int iRebase; // If mask does not start from zero, this indicates the offset that needs to be subtracted from each swizzle + int iRebase; // If mask does not start from zero, this indicates the offset that needs to be subtracted from each swizzle - uint32_t ui32Stream; - MIN_PRECISION eMinPrec; + uint32_t ui32Stream; + MIN_PRECISION eMinPrec; - std::set isIndexed; // Set of phases where this input/output is part of a index range. - std::map indexStart; // If indexed, contains the start index for the range - std::map index; // If indexed, contains the current index relative to the index start. + std::set isIndexed; // Set of phases where this input/output is part of a index range. + std::map indexStart; // If indexed, contains the start index for the range + std::map index; // If indexed, contains the current index relative to the index start. + }; - }; + ShaderInfo() : + ui32MajorVersion(), + ui32MinorVersion(), + psResourceBindings(), + psConstantBuffers(), + psThisPointerConstBuffer(), + psClassTypes(), + psClassInstances() + {} - ShaderInfo() : - ui32MajorVersion(), - ui32MinorVersion(), - psResourceBindings(), - psConstantBuffers(), - psThisPointerConstBuffer(), - psClassTypes(), - psClassInstances() - {} + SHADER_VARIABLE_TYPE GetTextureDataType(uint32_t regNo); - SHADER_VARIABLE_TYPE GetTextureDataType(uint32_t regNo); + int GetResourceFromBindingPoint(const ResourceGroup eGroup, const uint32_t ui32BindPoint, const ResourceBinding** ppsOutBinding) const; - int GetResourceFromBindingPoint(const ResourceGroup eGroup, const uint32_t ui32BindPoint, const ResourceBinding** ppsOutBinding) const; + void GetConstantBufferFromBindingPoint(const ResourceGroup eGroup, const uint32_t ui32BindPoint, const ConstantBuffer** ppsConstBuf) const; - void GetConstantBufferFromBindingPoint(const ResourceGroup eGroup, const uint32_t ui32BindPoint, const ConstantBuffer** ppsConstBuf) const; + int GetInterfaceVarFromOffset(uint32_t ui32Offset, ShaderVar** ppsShaderVar) const; - int GetInterfaceVarFromOffset(uint32_t ui32Offset, ShaderVar** ppsShaderVar) const; + int GetInputSignatureFromRegister(const uint32_t ui32Register, const uint32_t ui32Mask, const InOutSignature** ppsOut, bool allowNull = false) const; + int GetPatchConstantSignatureFromRegister(const uint32_t ui32Register, const uint32_t ui32Mask, const InOutSignature** ppsOut, bool allowNull = false) const; + int GetOutputSignatureFromRegister(const uint32_t ui32Register, + const uint32_t ui32CompMask, + const uint32_t ui32Stream, + const InOutSignature** ppsOut, + bool allowNull = false) const; - int GetInputSignatureFromRegister(const uint32_t ui32Register, const uint32_t ui32Mask, const InOutSignature** ppsOut, bool allowNull = false) const; - int GetPatchConstantSignatureFromRegister(const uint32_t ui32Register, const uint32_t ui32Mask, const InOutSignature** ppsOut, bool allowNull = false) const; - int GetOutputSignatureFromRegister(const uint32_t ui32Register, - const uint32_t ui32CompMask, - const uint32_t ui32Stream, - const InOutSignature** ppsOut, - bool allowNull = false) const; + int GetOutputSignatureFromSystemValue(SPECIAL_NAME eSystemValueType, uint32_t ui32SemanticIndex, const InOutSignature** ppsOut) const; - int GetOutputSignatureFromSystemValue(SPECIAL_NAME eSystemValueType, uint32_t ui32SemanticIndex, const InOutSignature** ppsOut) const; - - static ResourceGroup ResourceTypeToResourceGroup(ResourceType); + static ResourceGroup ResourceTypeToResourceGroup(ResourceType); static uint32_t GetCBVarSize(const ShaderVarType* psType, bool matrixAsVectors, bool wholeArraySize = false); - static int GetShaderVarFromOffset(const uint32_t ui32Vec4Offset, - const uint32_t (&pui32Swizzle)[4], - const ConstantBuffer* psCBuf, - const ShaderVarType** ppsShaderVar, - bool* isArray, - std::vector* arrayIndices, - int32_t* pi32Rebase, - uint32_t flags); + static int GetShaderVarFromOffset(const uint32_t ui32Vec4Offset, + const uint32_t(&pui32Swizzle)[4], + const ConstantBuffer* psCBuf, + const ShaderVarType** ppsShaderVar, + bool* isArray, + std::vector* arrayIndices, + int32_t* pi32Rebase, + uint32_t flags); - static std::string GetShaderVarIndexedFullName(const ShaderVarType* psShaderVar, const std::vector& indices, const std::string& dynamicIndex, bool revertDynamicIndexCalc, bool matrixAsVectors); + static std::string GetShaderVarIndexedFullName(const ShaderVarType* psShaderVar, const std::vector& indices, const std::string& dynamicIndex, bool revertDynamicIndexCalc, bool matrixAsVectors); - // Apply shader precision information to resource bindings - void AddSamplerPrecisions(HLSLccSamplerPrecisionInfo &info); + // Apply shader precision information to resource bindings + void AddSamplerPrecisions(HLSLccSamplerPrecisionInfo &info); - uint32_t ui32MajorVersion; - uint32_t ui32MinorVersion; + uint32_t ui32MajorVersion; + uint32_t ui32MinorVersion; - std::vector psInputSignatures; - std::vector psOutputSignatures; - std::vector psPatchConstantSignatures; + std::vector psInputSignatures; + std::vector psOutputSignatures; + std::vector psPatchConstantSignatures; - std::vector psResourceBindings; + std::vector psResourceBindings; - std::vector psConstantBuffers; - ConstantBuffer* psThisPointerConstBuffer; + std::vector psConstantBuffers; + ConstantBuffer* psThisPointerConstBuffer; - std::vector psClassTypes; - std::vector psClassInstances; + std::vector psClassTypes; + std::vector psClassInstances; - //Func table ID to class name ID. - HLSLcc::growing_vector aui32TableIDToTypeID; + //Func table ID to class name ID. + HLSLcc::growing_vector aui32TableIDToTypeID; - HLSLcc::growing_vector aui32ResourceMap[RGROUP_COUNT]; + HLSLcc::growing_vector aui32ResourceMap[RGROUP_COUNT]; - HLSLcc::growing_vector sGroupSharedVarType; + HLSLcc::growing_vector sGroupSharedVarType; - TESSELLATOR_PARTITIONING eTessPartitioning; - TESSELLATOR_OUTPUT_PRIMITIVE eTessOutPrim; - uint32_t ui32TessInputControlPointCount; - uint32_t ui32TessOutputControlPointCount; - TESSELLATOR_DOMAIN eTessDomain; - bool bEarlyFragmentTests; + TESSELLATOR_PARTITIONING eTessPartitioning; + TESSELLATOR_OUTPUT_PRIMITIVE eTessOutPrim; + uint32_t ui32TessInputControlPointCount; + uint32_t ui32TessOutputControlPointCount; + TESSELLATOR_DOMAIN eTessDomain; + bool bEarlyFragmentTests; }; - diff --git a/include/growing_array.h b/include/growing_array.h index 199d04a..d558665 100644 --- a/include/growing_array.h +++ b/include/growing_array.h @@ -2,46 +2,44 @@ namespace HLSLcc { - // A vector that automatically grows when written to, fills the intermediate ones with default value. - // Reading from an index returns the default value if attempting to access out of bounds. - template class growing_vector - { - public: - growing_vector() : data() {} - - std::vector data; - - T & operator[](std::size_t idx) - { - if (idx >= data.size()) - data.resize((idx + 1) * 2); - return data[idx]; - } - - const T & operator[](std::size_t idx) const - { - static T defaultValue = T(); - if (idx >= data.size()) - return defaultValue; - return data[idx]; - } - - }; - - // Same but with bool specialization - template <> class growing_vector - { - public: - growing_vector() : data() {} - - std::vector data; - - std::vector::reference operator[](std::size_t idx) - { - if (idx >= data.size()) - data.resize((idx + 1) * 2, false); - return data[idx]; - } - - }; -}; + // A vector that automatically grows when written to, fills the intermediate ones with default value. + // Reading from an index returns the default value if attempting to access out of bounds. + template class growing_vector + { + public: + growing_vector() : data() {} + + std::vector data; + + T & operator[](std::size_t idx) + { + if (idx >= data.size()) + data.resize((idx + 1) * 2); + return data[idx]; + } + + const T & operator[](std::size_t idx) const + { + static T defaultValue = T(); + if (idx >= data.size()) + return defaultValue; + return data[idx]; + } + }; + + // Same but with bool specialization + template<> class growing_vector + { + public: + growing_vector() : data() {} + + std::vector data; + + std::vector::reference operator[](std::size_t idx) + { + if (idx >= data.size()) + data.resize((idx + 1) * 2, false); + return data[idx]; + } + }; +} diff --git a/include/hlslcc.h b/include/hlslcc.h index ce32214..a9225e2 100644 --- a/include/hlslcc.h +++ b/include/hlslcc.h @@ -6,7 +6,7 @@ #include #include -#if defined (_WIN32) && defined(HLSLCC_DYNLIB) +#if defined(_WIN32) && defined(HLSLCC_DYNLIB) #define HLSLCC_APIENTRY __stdcall #if defined(libHLSLcc_EXPORTS) #define HLSLCC_API __declspec(dllexport) @@ -24,9 +24,9 @@ typedef enum { LANG_DEFAULT,// Depends on the HLSL shader model. - LANG_ES_100, LANG_ES_FIRST=LANG_ES_100, + LANG_ES_100, LANG_ES_FIRST = LANG_ES_100, LANG_ES_300, - LANG_ES_310, LANG_ES_LAST = LANG_ES_310, + LANG_ES_310, LANG_ES_LAST = LANG_ES_310, LANG_120, LANG_GL_FIRST = LANG_120, LANG_130, LANG_140, @@ -37,15 +37,16 @@ typedef enum LANG_420, LANG_430, LANG_440, LANG_GL_LAST = LANG_440, - LANG_METAL, + LANG_METAL, } GLLang; -typedef struct GlExtensions { - uint32_t ARB_explicit_attrib_location : 1; - uint32_t ARB_explicit_uniform_location : 1; - uint32_t ARB_shading_language_420pack : 1; - uint32_t OVR_multiview : 1; - uint32_t EXT_shader_framebuffer_fetch : 1; +typedef struct GlExtensions +{ + uint32_t ARB_explicit_attrib_location : 1; + uint32_t ARB_explicit_uniform_location : 1; + uint32_t ARB_shading_language_420pack : 1; + uint32_t OVR_multiview : 1; + uint32_t EXT_shader_framebuffer_fetch : 1; } GlExtensions; #include "ShaderInfo.h" @@ -65,11 +66,11 @@ typedef enum INTERPOLATION_MODE INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE = 7, } INTERPOLATION_MODE; -#define PS_FLAG_VERTEX_SHADER 0x1 -#define PS_FLAG_HULL_SHADER 0x2 -#define PS_FLAG_DOMAIN_SHADER 0x4 +#define PS_FLAG_VERTEX_SHADER 0x1 +#define PS_FLAG_HULL_SHADER 0x2 +#define PS_FLAG_DOMAIN_SHADER 0x4 #define PS_FLAG_GEOMETRY_SHADER 0x8 -#define PS_FLAG_PIXEL_SHADER 0x10 +#define PS_FLAG_PIXEL_SHADER 0x10 #define TO_FLAG_NONE 0x0 #define TO_FLAG_INTEGER 0x1 @@ -99,42 +100,42 @@ typedef enum INTERPOLATION_MODE typedef enum { - INVALID_SHADER = -1, - PIXEL_SHADER, - VERTEX_SHADER, - GEOMETRY_SHADER, - HULL_SHADER, - DOMAIN_SHADER, - COMPUTE_SHADER, + INVALID_SHADER = -1, + PIXEL_SHADER, + VERTEX_SHADER, + GEOMETRY_SHADER, + HULL_SHADER, + DOMAIN_SHADER, + COMPUTE_SHADER, } SHADER_TYPE; -// Enum for texture dimension reflection data +// Enum for texture dimension reflection data typedef enum { - TD_FLOAT = 0, - TD_INT, - TD_2D, - TD_3D, - TD_CUBE, - TD_2DSHADOW, - TD_2DARRAY, - TD_CUBEARRAY + TD_FLOAT = 0, + TD_INT, + TD_2D, + TD_3D, + TD_CUBE, + TD_2DSHADOW, + TD_2DARRAY, + TD_CUBEARRAY } HLSLCC_TEX_DIMENSION; // The prefix for all temporary variables used by the generated code. // Using a texture or uniform name like this will cause conflicts #define HLSLCC_TEMP_PREFIX "u_xlat" -typedef std::vector> MemberDefinitions; +typedef std::vector > MemberDefinitions; // We store struct definition contents inside a vector of strings struct StructDefinition { - StructDefinition() : m_Members(), m_Dependencies(), m_IsPrinted(false) {} + StructDefinition() : m_Members(), m_Dependencies(), m_IsPrinted(false) {} - MemberDefinitions m_Members; // A vector of strings with the struct members - std::vector m_Dependencies; // A vector of struct names this struct depends on. - bool m_IsPrinted; // Has this struct been printed out yet? + MemberDefinitions m_Members; // A vector of strings with the struct members + std::vector m_Dependencies; // A vector of struct names this struct depends on. + bool m_IsPrinted; // Has this struct been printed out yet? }; typedef std::map StructDefinitions; @@ -146,65 +147,65 @@ typedef std::map FunctionDefinitions; // (because both UAVs and textures use the same slots in Metal, also constant buffers and other buffers etc) class BindingSlotAllocator { - typedef std::map SlotMap; - SlotMap m_Allocations; - uint32_t m_ShaderStageAllocations; + typedef std::map SlotMap; + SlotMap m_Allocations; + uint32_t m_ShaderStageAllocations; public: - BindingSlotAllocator() : m_Allocations(), m_ShaderStageAllocations(0) - { - for(int i = MAX_RESOURCE_BINDINGS-1; i >= 0; i --) - m_FreeSlots.push_back(i); - } - - enum BindType - { - ConstantBuffer = 0, - RWBuffer, - Texture, - UAV - }; - - uint32_t GetBindingSlot(uint32_t regNo, BindType type) - { - // The key is regNumber with the bindtype stored to highest 16 bits - uint32_t key = (m_ShaderStageAllocations + regNo) | (uint32_t(type) << 16); - SlotMap::iterator itr = m_Allocations.find(key); - if(itr == m_Allocations.end()) - { - uint32_t slot = m_FreeSlots.back(); - m_FreeSlots.pop_back(); - m_Allocations.insert(std::make_pair(key, slot)); - return slot; - } - return itr->second; - } - - // Func for reserving binding slots with the original reg number. - // Used for fragment shader UAVs (SetRandomWriteTarget etc). - void ReserveBindingSlot(uint32_t regNo, BindType type) - { - uint32_t key = regNo | (uint32_t(type) << 16); - m_Allocations.insert(std::make_pair(key, regNo)); - - // Remove regNo from free slots - for (int i = m_FreeSlots.size() - 1; i >= 0; i--) - { - if (m_FreeSlots[i] == regNo) - { - m_FreeSlots.erase(m_FreeSlots.begin() + i); - return; - } - } - } - - uint32_t SaveTotalShaderStageAllocationsCount() - { - m_ShaderStageAllocations = m_Allocations.size(); - return m_ShaderStageAllocations; - } + BindingSlotAllocator() : m_Allocations(), m_ShaderStageAllocations(0) + { + for (int i = MAX_RESOURCE_BINDINGS - 1; i >= 0; i--) + m_FreeSlots.push_back(i); + } + + enum BindType + { + ConstantBuffer = 0, + RWBuffer, + Texture, + UAV + }; + + uint32_t GetBindingSlot(uint32_t regNo, BindType type) + { + // The key is regNumber with the bindtype stored to highest 16 bits + uint32_t key = (m_ShaderStageAllocations + regNo) | (uint32_t(type) << 16); + SlotMap::iterator itr = m_Allocations.find(key); + if (itr == m_Allocations.end()) + { + uint32_t slot = m_FreeSlots.back(); + m_FreeSlots.pop_back(); + m_Allocations.insert(std::make_pair(key, slot)); + return slot; + } + return itr->second; + } + + // Func for reserving binding slots with the original reg number. + // Used for fragment shader UAVs (SetRandomWriteTarget etc). + void ReserveBindingSlot(uint32_t regNo, BindType type) + { + uint32_t key = regNo | (uint32_t(type) << 16); + m_Allocations.insert(std::make_pair(key, regNo)); + + // Remove regNo from free slots + for (int i = m_FreeSlots.size() - 1; i >= 0; i--) + { + if (m_FreeSlots[i] == regNo) + { + m_FreeSlots.erase(m_FreeSlots.begin() + i); + return; + } + } + } + + uint32_t SaveTotalShaderStageAllocationsCount() + { + m_ShaderStageAllocations = m_Allocations.size(); + return m_ShaderStageAllocations; + } private: - std::vector m_FreeSlots; + std::vector m_FreeSlots; }; //The shader stages (Vertex, Pixel et al) do not depend on each other @@ -220,156 +221,155 @@ class BindingSlotAllocator class GLSLCrossDependencyData { public: - // A container for a single Vulkan resource binding ( pair) - typedef std::pair VulkanResourceBinding; + // A container for a single Vulkan resource binding ( pair) + typedef std::pair VulkanResourceBinding; private: - //Required if PixelInterpDependency is true - std::vector pixelInterpolation; - - // Map of varying locations, indexed by varying names. - typedef std::map VaryingLocations; - - static const int MAX_NAMESPACES = 6; // Max namespaces: vert input, hull input, domain input, geom input, ps input, (ps output) - - VaryingLocations varyingLocationsMap[MAX_NAMESPACES]; - uint32_t nextAvailableVaryingLocation[MAX_NAMESPACES]; - - typedef std::map VulkanResourceBindings; - VulkanResourceBindings m_VulkanResourceBindings; - uint32_t m_NextAvailableVulkanResourceBinding[8]; // one per set. - - inline int GetVaryingNamespace(SHADER_TYPE eShaderType, bool isInput) - { - switch (eShaderType) - { - case VERTEX_SHADER: - return isInput ? 0 : 1; - - case HULL_SHADER: - return isInput ? 1 : 2; - - case DOMAIN_SHADER: - return isInput ? 2 : 3; - - case GEOMETRY_SHADER: - // The input depends on whether there's a tessellation shader before us - if (isInput) - { - return ui32ProgramStages & PS_FLAG_DOMAIN_SHADER ? 3 : 1; - } - return 4; - - case PIXEL_SHADER: - // The inputs can come from geom shader, domain shader or directly from vertex shader - if (isInput) - { - if (ui32ProgramStages & PS_FLAG_GEOMETRY_SHADER) - { - return 4; - } - else if (ui32ProgramStages & PS_FLAG_DOMAIN_SHADER) - { - return 3; - } - else - { - return 1; - } - } - return 5; // This value never really used - default: - return 0; - } - } - - typedef std::map SpecializationConstantMap; - SpecializationConstantMap m_SpecConstantMap; - uint32_t m_NextSpecID; + //Required if PixelInterpDependency is true + std::vector pixelInterpolation; + + // Map of varying locations, indexed by varying names. + typedef std::map VaryingLocations; + + static const int MAX_NAMESPACES = 6; // Max namespaces: vert input, hull input, domain input, geom input, ps input, (ps output) + + VaryingLocations varyingLocationsMap[MAX_NAMESPACES]; + uint32_t nextAvailableVaryingLocation[MAX_NAMESPACES]; + + typedef std::map VulkanResourceBindings; + VulkanResourceBindings m_VulkanResourceBindings; + uint32_t m_NextAvailableVulkanResourceBinding[8]; // one per set. + + inline int GetVaryingNamespace(SHADER_TYPE eShaderType, bool isInput) + { + switch (eShaderType) + { + case VERTEX_SHADER: + return isInput ? 0 : 1; + + case HULL_SHADER: + return isInput ? 1 : 2; + + case DOMAIN_SHADER: + return isInput ? 2 : 3; + + case GEOMETRY_SHADER: + // The input depends on whether there's a tessellation shader before us + if (isInput) + { + return ui32ProgramStages & PS_FLAG_DOMAIN_SHADER ? 3 : 1; + } + return 4; + + case PIXEL_SHADER: + // The inputs can come from geom shader, domain shader or directly from vertex shader + if (isInput) + { + if (ui32ProgramStages & PS_FLAG_GEOMETRY_SHADER) + { + return 4; + } + else if (ui32ProgramStages & PS_FLAG_DOMAIN_SHADER) + { + return 3; + } + else + { + return 1; + } + } + return 5; // This value never really used + default: + return 0; + } + } + + typedef std::map SpecializationConstantMap; + SpecializationConstantMap m_SpecConstantMap; + uint32_t m_NextSpecID; public: - GLSLCrossDependencyData() - : eTessPartitioning(), - eTessOutPrim(), - fMaxTessFactor(64.0), - numPatchesInThreadGroup(0), - hasControlPoint(false), - hasPatchConstant(false), - ui32ProgramStages(0), - m_ExtBlendModes(), - m_NextSpecID(0) - { - memset(nextAvailableVaryingLocation, 0, sizeof(nextAvailableVaryingLocation)); - memset(m_NextAvailableVulkanResourceBinding, 0, sizeof(m_NextAvailableVulkanResourceBinding)); - } - - - // Retrieve the location for a varying with a given name. - // If the name doesn't already have an allocated location, allocate one - // and store it into the map. - inline uint32_t GetVaryingLocation(const std::string &name, SHADER_TYPE eShaderType, bool isInput) - { - int nspace = GetVaryingNamespace(eShaderType, isInput); - VaryingLocations::iterator itr = varyingLocationsMap[nspace].find(name); - if (itr != varyingLocationsMap[nspace].end()) - return itr->second; - - uint32_t newKey = nextAvailableVaryingLocation[nspace]; - nextAvailableVaryingLocation[nspace]++; - varyingLocationsMap[nspace].insert(std::make_pair(name, newKey)); - return newKey; - } - - // Retrieve the binding for a resource (texture, constant buffer, image) with a given name - // If not found, allocate a new one (in set 0) and return that - // The returned value is a pair of - // If the name contains "hlslcc_set_X_bind_Y", those values (from the first found occurence in the name) - // will be used instead, and all occurences of that string will be removed from name, so name parameter can be modified - // if allocRoomForCounter is true, the following binding number in the same set will be allocated with name + '_counter' - inline std::pair GetVulkanResourceBinding(std::string &name, bool allocRoomForCounter = false, uint32_t preferredSet = 0) - { - // scan for the special marker - const char *marker = "Xhlslcc_set_%d_bind_%dX"; - uint32_t Set = 0, Binding = 0; - size_t startLoc = name.find("Xhlslcc"); - if ((startLoc != std::string::npos) && (sscanf(name.c_str() + startLoc, marker, &Set, &Binding) == 2)) - { - // Get rid of all markers - while ((startLoc = name.find("Xhlslcc")) != std::string::npos) - { - size_t endLoc = name.find('X', startLoc + 1); - if (endLoc == std::string::npos) - break; - name.erase(startLoc, endLoc - startLoc + 1); - } - // Add to map - VulkanResourceBinding newBind = std::make_pair(Set, Binding); - m_VulkanResourceBindings.insert(std::make_pair(name, newBind)); - if (allocRoomForCounter) - { - VulkanResourceBinding counterBind = std::make_pair(Set, Binding+1); - m_VulkanResourceBindings.insert(std::make_pair(name + "_counter", counterBind)); - } - - return newBind; - } - - VulkanResourceBindings::iterator itr = m_VulkanResourceBindings.find(name); - if (itr != m_VulkanResourceBindings.end()) - return itr->second; - - // Allocate a new one - VulkanResourceBinding newBind = std::make_pair(preferredSet, m_NextAvailableVulkanResourceBinding[preferredSet]); - m_NextAvailableVulkanResourceBinding[preferredSet]++; - m_VulkanResourceBindings.insert(std::make_pair(name, newBind)); - if (allocRoomForCounter) - { - VulkanResourceBinding counterBind = std::make_pair(preferredSet, m_NextAvailableVulkanResourceBinding[preferredSet]); - m_NextAvailableVulkanResourceBinding[preferredSet]++; - m_VulkanResourceBindings.insert(std::make_pair(name + "_counter", counterBind)); - } - return newBind; - } + GLSLCrossDependencyData() + : eTessPartitioning(), + eTessOutPrim(), + fMaxTessFactor(64.0), + numPatchesInThreadGroup(0), + hasControlPoint(false), + hasPatchConstant(false), + ui32ProgramStages(0), + m_ExtBlendModes(), + m_NextSpecID(0) + { + memset(nextAvailableVaryingLocation, 0, sizeof(nextAvailableVaryingLocation)); + memset(m_NextAvailableVulkanResourceBinding, 0, sizeof(m_NextAvailableVulkanResourceBinding)); + } + + // Retrieve the location for a varying with a given name. + // If the name doesn't already have an allocated location, allocate one + // and store it into the map. + inline uint32_t GetVaryingLocation(const std::string &name, SHADER_TYPE eShaderType, bool isInput) + { + int nspace = GetVaryingNamespace(eShaderType, isInput); + VaryingLocations::iterator itr = varyingLocationsMap[nspace].find(name); + if (itr != varyingLocationsMap[nspace].end()) + return itr->second; + + uint32_t newKey = nextAvailableVaryingLocation[nspace]; + nextAvailableVaryingLocation[nspace]++; + varyingLocationsMap[nspace].insert(std::make_pair(name, newKey)); + return newKey; + } + + // Retrieve the binding for a resource (texture, constant buffer, image) with a given name + // If not found, allocate a new one (in set 0) and return that + // The returned value is a pair of + // If the name contains "hlslcc_set_X_bind_Y", those values (from the first found occurence in the name) + // will be used instead, and all occurences of that string will be removed from name, so name parameter can be modified + // if allocRoomForCounter is true, the following binding number in the same set will be allocated with name + '_counter' + inline std::pair GetVulkanResourceBinding(std::string &name, bool allocRoomForCounter = false, uint32_t preferredSet = 0) + { + // scan for the special marker + const char *marker = "Xhlslcc_set_%d_bind_%dX"; + uint32_t Set = 0, Binding = 0; + size_t startLoc = name.find("Xhlslcc"); + if ((startLoc != std::string::npos) && (sscanf(name.c_str() + startLoc, marker, &Set, &Binding) == 2)) + { + // Get rid of all markers + while ((startLoc = name.find("Xhlslcc")) != std::string::npos) + { + size_t endLoc = name.find('X', startLoc + 1); + if (endLoc == std::string::npos) + break; + name.erase(startLoc, endLoc - startLoc + 1); + } + // Add to map + VulkanResourceBinding newBind = std::make_pair(Set, Binding); + m_VulkanResourceBindings.insert(std::make_pair(name, newBind)); + if (allocRoomForCounter) + { + VulkanResourceBinding counterBind = std::make_pair(Set, Binding + 1); + m_VulkanResourceBindings.insert(std::make_pair(name + "_counter", counterBind)); + } + + return newBind; + } + + VulkanResourceBindings::iterator itr = m_VulkanResourceBindings.find(name); + if (itr != m_VulkanResourceBindings.end()) + return itr->second; + + // Allocate a new one + VulkanResourceBinding newBind = std::make_pair(preferredSet, m_NextAvailableVulkanResourceBinding[preferredSet]); + m_NextAvailableVulkanResourceBinding[preferredSet]++; + m_VulkanResourceBindings.insert(std::make_pair(name, newBind)); + if (allocRoomForCounter) + { + VulkanResourceBinding counterBind = std::make_pair(preferredSet, m_NextAvailableVulkanResourceBinding[preferredSet]); + m_NextAvailableVulkanResourceBinding[preferredSet]++; + m_VulkanResourceBindings.insert(std::make_pair(name + "_counter", counterBind)); + } + return newBind; + } //dcl_tessellator_partitioning and dcl_tessellator_output_primitive appear in hull shader for D3D, //but they appear on inputs inside domain shaders for GL. @@ -383,74 +383,77 @@ class GLSLCrossDependencyData bool hasControlPoint; bool hasPatchConstant; - // Bitfield for the shader stages this program is going to include (see PS_FLAG_*). - // Needed so we can construct proper shader input and output names - uint32_t ui32ProgramStages; - - std::vector m_ExtBlendModes; // The blend modes (from KHR_blend_equation_advanced) requested for this shader. See ext spec for list. - - inline INTERPOLATION_MODE GetInterpolationMode(uint32_t regNo) - { - if (regNo >= pixelInterpolation.size()) - return INTERPOLATION_UNDEFINED; - else - return pixelInterpolation[regNo]; - } - - inline void SetInterpolationMode(uint32_t regNo, INTERPOLATION_MODE mode) - { - if (regNo >= pixelInterpolation.size()) - pixelInterpolation.resize((regNo + 1) * 2, INTERPOLATION_UNDEFINED); - - pixelInterpolation[regNo] = mode; - } - - struct CompareFirst - { - CompareFirst(std::string val) : m_Val (val) {} - bool operator()(const std::pair& elem) const - { - return m_Val == elem.first; - } - private: - std::string m_Val; - }; - - inline bool IsMemberDeclared(const std::string &name) - { - if (std::find_if(m_SharedFunctionMembers.begin(), m_SharedFunctionMembers.end(), CompareFirst(name)) != m_SharedFunctionMembers.end()) - return true; - return false; - } - - MemberDefinitions m_SharedFunctionMembers; - BindingSlotAllocator m_SharedTextureSlots, m_SharedSamplerSlots; - BindingSlotAllocator m_SharedBufferSlots; - - inline void ClearCrossDependencyData() - { - pixelInterpolation.clear(); - for (int i = 0; i < MAX_NAMESPACES; i++) - { - varyingLocationsMap[i].clear(); - nextAvailableVaryingLocation[i] = 0; - } - m_NextSpecID = kArraySizeConstantID + 1; - m_SpecConstantMap.clear(); - m_SharedFunctionMembers.clear(); - } - - // Retrieve or allocate a layout slot for Vulkan specialization constant - inline uint32_t GetSpecializationConstantSlot(const std::string &name) - { - SpecializationConstantMap::iterator itr = m_SpecConstantMap.find(name); - if (itr != m_SpecConstantMap.end()) - return itr->second; - - m_SpecConstantMap.insert(std::make_pair(std::string(name), m_NextSpecID)); - - return m_NextSpecID++; - } + // Bitfield for the shader stages this program is going to include (see PS_FLAG_*). + // Needed so we can construct proper shader input and output names + uint32_t ui32ProgramStages; + + std::vector m_ExtBlendModes; // The blend modes (from KHR_blend_equation_advanced) requested for this shader. See ext spec for list. + + inline INTERPOLATION_MODE GetInterpolationMode(uint32_t regNo) + { + if (regNo >= pixelInterpolation.size()) + return INTERPOLATION_UNDEFINED; + else + return pixelInterpolation[regNo]; + } + + inline void SetInterpolationMode(uint32_t regNo, INTERPOLATION_MODE mode) + { + if (regNo >= pixelInterpolation.size()) + pixelInterpolation.resize((regNo + 1) * 2, INTERPOLATION_UNDEFINED); + + pixelInterpolation[regNo] = mode; + } + + struct CompareFirst + { + CompareFirst(std::string val) : m_Val(val) {} + bool operator()(const std::pair& elem) const + { + return m_Val == elem.first; + } + + private: + std::string m_Val; + }; + + inline bool IsMemberDeclared(const std::string &name) + { + if (std::find_if(m_SharedFunctionMembers.begin(), m_SharedFunctionMembers.end(), CompareFirst(name)) != m_SharedFunctionMembers.end()) + return true; + return false; + } + + MemberDefinitions m_SharedFunctionMembers; + std::vector m_SharedDependencies; + BindingSlotAllocator m_SharedTextureSlots, m_SharedSamplerSlots; + BindingSlotAllocator m_SharedBufferSlots; + + inline void ClearCrossDependencyData() + { + pixelInterpolation.clear(); + for (int i = 0; i < MAX_NAMESPACES; i++) + { + varyingLocationsMap[i].clear(); + nextAvailableVaryingLocation[i] = 0; + } + m_NextSpecID = kArraySizeConstantID + 1; + m_SpecConstantMap.clear(); + m_SharedFunctionMembers.clear(); + m_SharedDependencies.clear(); + } + + // Retrieve or allocate a layout slot for Vulkan specialization constant + inline uint32_t GetSpecializationConstantSlot(const std::string &name) + { + SpecializationConstantMap::iterator itr = m_SpecConstantMap.find(name); + if (itr != m_SpecConstantMap.end()) + return itr->second; + + m_SpecConstantMap.insert(std::make_pair(std::string(name), m_NextSpecID)); + + return m_NextSpecID++; + } }; struct GLSLShader @@ -466,28 +469,28 @@ struct GLSLShader class HLSLccReflection { public: - HLSLccReflection() {} - virtual ~HLSLccReflection() {} + HLSLccReflection() {} + virtual ~HLSLccReflection() {} - // Called on errors or diagnostic messages - virtual void OnDiagnostics(const std::string &error, int line, bool isError) {} + // Called on errors or diagnostic messages + virtual void OnDiagnostics(const std::string &error, int line, bool isError) {} - virtual void OnInputBinding(const std::string &name, int bindIndex) {} + virtual void OnInputBinding(const std::string &name, int bindIndex) {} - // Returns false if this constant buffer is not needed for this shader. This info can be used for pruning unused - // constant buffers and vars from compute shaders where we need broader context than a single kernel to know - // if something can be dropped, as the constant buffers are shared between all kernels in a .compute file. - virtual bool OnConstantBuffer(const std::string &name, size_t bufferSize, size_t memberCount) { return true; } + // Returns false if this constant buffer is not needed for this shader. This info can be used for pruning unused + // constant buffers and vars from compute shaders where we need broader context than a single kernel to know + // if something can be dropped, as the constant buffers are shared between all kernels in a .compute file. + virtual bool OnConstantBuffer(const std::string &name, size_t bufferSize, size_t memberCount) { return true; } - // Returns false if this constant var is not needed for this shader. See above. - virtual bool OnConstant(const std::string &name, int bindIndex, SHADER_VARIABLE_TYPE cType, int rows, int cols, bool isMatrix, int arraySize) { return true; } + // Returns false if this constant var is not needed for this shader. See above. + virtual bool OnConstant(const std::string &name, int bindIndex, SHADER_VARIABLE_TYPE cType, int rows, int cols, bool isMatrix, int arraySize, bool isUsed) { return true; } - virtual void OnConstantBufferBinding(const std::string &name, int bindIndex) {} - virtual void OnTextureBinding(const std::string &name, int bindIndex, int samplerIndex, bool multisampled, HLSLCC_TEX_DIMENSION dim, bool isUAV) {} - virtual void OnBufferBinding(const std::string &name, int bindIndex, bool isUAV) {} - virtual void OnThreadGroupSize(unsigned int xSize, unsigned int ySize, unsigned int zSize) {} - virtual void OnTessellationInfo(uint32_t tessPartitionMode, uint32_t tessOutputWindingOrder, uint32_t tessMaxFactor, uint32_t tessNumPatchesInThreadGroup) {} - virtual void OnTessellationKernelInfo(uint32_t patchKernelBufferCount) {} + virtual void OnConstantBufferBinding(const std::string &name, int bindIndex) {} + virtual void OnTextureBinding(const std::string &name, int bindIndex, int samplerIndex, bool multisampled, HLSLCC_TEX_DIMENSION dim, bool isUAV) {} + virtual void OnBufferBinding(const std::string &name, int bindIndex, bool isUAV) {} + virtual void OnThreadGroupSize(unsigned int xSize, unsigned int ySize, unsigned int zSize) {} + virtual void OnTessellationInfo(uint32_t tessPartitionMode, uint32_t tessOutputWindingOrder, uint32_t tessMaxFactor, uint32_t tessNumPatchesInThreadGroup) {} + virtual void OnTessellationKernelInfo(uint32_t patchKernelBufferCount) {} }; @@ -562,7 +565,7 @@ static const unsigned int HLSLCC_FLAG_VULKAN_BINDINGS = 0x40000; static const unsigned int HLSLCC_FLAG_METAL_SHADOW_SAMPLER_LINEAR = 0x80000; // If set, avoid emit atomic counter (ARB_shader_atomic_counters) and use atomic functions provided by ARB_shader_storage_buffer_object instead. -static const unsigned int HLSLCC_FLAG_AVOID_SHADER_ATOMIC_COUNTERS = 0x100000; +static const unsigned int HLSLCC_FLAG_AVOID_SHADER_ATOMIC_COUNTERS = 0x100000; // If set, and generating Vulkan shaders, attempts to detect static branching and transforms them into specialization constants static const unsigned int HLSLCC_FLAG_VULKAN_SPECIALIZATION_CONSTANTS = 0x200000; @@ -588,27 +591,26 @@ extern "C" { #endif HLSLCC_API int HLSLCC_APIENTRY TranslateHLSLFromFile(const char* filename, - unsigned int flags, - GLLang language, - const GlExtensions *extensions, - GLSLCrossDependencyData* dependencies, - HLSLccSamplerPrecisionInfo& samplerPrecisions, - HLSLccReflection& reflectionCallbacks, - GLSLShader* result - ); + unsigned int flags, + GLLang language, + const GlExtensions *extensions, + GLSLCrossDependencyData* dependencies, + HLSLccSamplerPrecisionInfo& samplerPrecisions, + HLSLccReflection& reflectionCallbacks, + GLSLShader* result +); HLSLCC_API int HLSLCC_APIENTRY TranslateHLSLFromMem(const char* shader, - unsigned int flags, - GLLang language, - const GlExtensions *extensions, - GLSLCrossDependencyData* dependencies, - HLSLccSamplerPrecisionInfo& samplerPrecisions, - HLSLccReflection& reflectionCallbacks, - GLSLShader* result); + unsigned int flags, + GLLang language, + const GlExtensions *extensions, + GLSLCrossDependencyData* dependencies, + HLSLccSamplerPrecisionInfo& samplerPrecisions, + HLSLccReflection& reflectionCallbacks, + GLSLShader* result); #ifdef __cplusplus } #endif #endif - diff --git a/include/hlslcc.hpp b/include/hlslcc.hpp index fa4dd96..67a792a 100644 --- a/include/hlslcc.hpp +++ b/include/hlslcc.hpp @@ -1,5 +1,3 @@ - extern "C" { #include "hlslcc.h" } - diff --git a/include/pstdint.h b/include/pstdint.h index 00fc1fc..f155b23 100644 --- a/include/pstdint.h +++ b/include/pstdint.h @@ -5,11 +5,11 @@ * * Copyright (c) 2005-2011 Paul Hsieh * All rights reserved. - * + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: - * + * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright @@ -17,7 +17,7 @@ * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. - * + * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. @@ -191,7 +191,7 @@ * do nothing else. On the Mac OS X version of gcc this is _STDINT_H_. */ -#if ((defined(__STDC__) && __STDC__ && __STDC_VERSION__ >= 199901L) || (defined (__WATCOMC__) && (defined (_STDINT_H_INCLUDED) || __WATCOMC__ >= 1250)) || (defined(__GNUC__) && (defined(_STDINT_H) || defined(_STDINT_H_) || defined (__UINT_FAST64_TYPE__)) )) && !defined (_PSTDINT_H_INCLUDED) +#if ((defined(__STDC__) && __STDC__ && __STDC_VERSION__ >= 199901L) || (defined(__WATCOMC__) && (defined(_STDINT_H_INCLUDED) || __WATCOMC__ >= 1250)) || (defined(__GNUC__) && (defined(_STDINT_H) || defined(_STDINT_H_) || defined(__UINT_FAST64_TYPE__)))) && !defined(_PSTDINT_H_INCLUDED) #include #define _PSTDINT_H_INCLUDED # ifndef PRINTF_INT64_MODIFIER @@ -242,47 +242,47 @@ * these duplicated definitions from Open Watcom's stdint.h file for now. */ -# if defined (__WATCOMC__) && __WATCOMC__ >= 1250 -# if !defined (INT64_C) +# if defined(__WATCOMC__) && __WATCOMC__ >= 1250 +# if !defined(INT64_C) # define INT64_C(x) (x + (INT64_MAX - INT64_MAX)) # endif -# if !defined (UINT64_C) +# if !defined(UINT64_C) # define UINT64_C(x) (x + (UINT64_MAX - UINT64_MAX)) # endif -# if !defined (INT32_C) +# if !defined(INT32_C) # define INT32_C(x) (x + (INT32_MAX - INT32_MAX)) # endif -# if !defined (UINT32_C) +# if !defined(UINT32_C) # define UINT32_C(x) (x + (UINT32_MAX - UINT32_MAX)) # endif -# if !defined (INT16_C) +# if !defined(INT16_C) # define INT16_C(x) (x) # endif -# if !defined (UINT16_C) +# if !defined(UINT16_C) # define UINT16_C(x) (x) # endif -# if !defined (INT8_C) +# if !defined(INT8_C) # define INT8_C(x) (x) # endif -# if !defined (UINT8_C) +# if !defined(UINT8_C) # define UINT8_C(x) (x) # endif -# if !defined (UINT64_MAX) +# if !defined(UINT64_MAX) # define UINT64_MAX 18446744073709551615ULL # endif -# if !defined (INT64_MAX) +# if !defined(INT64_MAX) # define INT64_MAX 9223372036854775807LL # endif -# if !defined (UINT32_MAX) +# if !defined(UINT32_MAX) # define UINT32_MAX 4294967295UL # endif -# if !defined (INT32_MAX) +# if !defined(INT32_MAX) # define INT32_MAX 2147483647L # endif -# if !defined (INTMAX_MAX) +# if !defined(INTMAX_MAX) # define INTMAX_MAX INT64_MAX # endif -# if !defined (INTMAX_MIN) +# if !defined(INTMAX_MIN) # define INTMAX_MIN INT64_MIN # endif # endif @@ -305,8 +305,8 @@ # define UINT8_MAX 0xff #endif #ifndef uint8_t -# if (UCHAR_MAX == UINT8_MAX) || defined (S_SPLINT_S) - typedef unsigned char uint8_t; +# if (UCHAR_MAX == UINT8_MAX) || defined(S_SPLINT_S) +typedef unsigned char uint8_t; # define UINT8_C(v) ((uint8_t) v) # else # error "Platform not supported" @@ -320,8 +320,8 @@ # define INT8_MIN INT8_C(0x80) #endif #ifndef int8_t -# if (SCHAR_MAX == INT8_MAX) || defined (S_SPLINT_S) - typedef signed char int8_t; +# if (SCHAR_MAX == INT8_MAX) || defined(S_SPLINT_S) +typedef signed char int8_t; # define INT8_C(v) ((int8_t) v) # else # error "Platform not supported" @@ -332,14 +332,14 @@ # define UINT16_MAX 0xffff #endif #ifndef uint16_t -#if (UINT_MAX == UINT16_MAX) || defined (S_SPLINT_S) - typedef unsigned int uint16_t; +#if (UINT_MAX == UINT16_MAX) || defined(S_SPLINT_S) +typedef unsigned int uint16_t; # ifndef PRINTF_INT16_MODIFIER # define PRINTF_INT16_MODIFIER "" # endif # define UINT16_C(v) ((uint16_t) (v)) #elif (USHRT_MAX == UINT16_MAX) - typedef unsigned short uint16_t; +typedef unsigned short uint16_t; # define UINT16_C(v) ((uint16_t) (v)) # ifndef PRINTF_INT16_MODIFIER # define PRINTF_INT16_MODIFIER "h" @@ -356,14 +356,14 @@ # define INT16_MIN INT16_C(0x8000) #endif #ifndef int16_t -#if (INT_MAX == INT16_MAX) || defined (S_SPLINT_S) - typedef signed int int16_t; +#if (INT_MAX == INT16_MAX) || defined(S_SPLINT_S) +typedef signed int int16_t; # define INT16_C(v) ((int16_t) (v)) # ifndef PRINTF_INT16_MODIFIER # define PRINTF_INT16_MODIFIER "" # endif #elif (SHRT_MAX == INT16_MAX) - typedef signed short int16_t; +typedef signed short int16_t; # define INT16_C(v) ((int16_t) (v)) # ifndef PRINTF_INT16_MODIFIER # define PRINTF_INT16_MODIFIER "h" @@ -377,20 +377,20 @@ # define UINT32_MAX (0xffffffffUL) #endif #ifndef uint32_t -#if (ULONG_MAX == UINT32_MAX) || defined (S_SPLINT_S) - typedef unsigned long uint32_t; +#if (ULONG_MAX == UINT32_MAX) || defined(S_SPLINT_S) +typedef unsigned long uint32_t; # define UINT32_C(v) v ## UL # ifndef PRINTF_INT32_MODIFIER # define PRINTF_INT32_MODIFIER "l" # endif #elif (UINT_MAX == UINT32_MAX) - typedef unsigned int uint32_t; +typedef unsigned int uint32_t; # ifndef PRINTF_INT32_MODIFIER # define PRINTF_INT32_MODIFIER "" # endif # define UINT32_C(v) v ## U #elif (USHRT_MAX == UINT32_MAX) - typedef unsigned short uint32_t; +typedef unsigned short uint32_t; # define UINT32_C(v) ((unsigned short) (v)) # ifndef PRINTF_INT32_MODIFIER # define PRINTF_INT32_MODIFIER "" @@ -407,20 +407,20 @@ # define INT32_MIN INT32_C(0x80000000) #endif #ifndef int32_t -#if (LONG_MAX == INT32_MAX) || defined (S_SPLINT_S) - typedef signed long int32_t; +#if (LONG_MAX == INT32_MAX) || defined(S_SPLINT_S) +typedef signed long int32_t; # define INT32_C(v) v ## L # ifndef PRINTF_INT32_MODIFIER # define PRINTF_INT32_MODIFIER "l" # endif #elif (INT_MAX == INT32_MAX) - typedef signed int int32_t; +typedef signed int int32_t; # define INT32_C(v) v # ifndef PRINTF_INT32_MODIFIER # define PRINTF_INT32_MODIFIER "" # endif #elif (SHRT_MAX == INT32_MAX) - typedef signed short int32_t; +typedef signed short int32_t; # define INT32_C(v) ((short) (v)) # ifndef PRINTF_INT32_MODIFIER # define PRINTF_INT32_MODIFIER "" @@ -438,11 +438,11 @@ */ #undef stdint_int64_defined -#if (defined(__STDC__) && defined(__STDC_VERSION__)) || defined (S_SPLINT_S) -# if (__STDC__ && __STDC_VERSION__ >= 199901L) || defined (S_SPLINT_S) +#if (defined(__STDC__) && defined(__STDC_VERSION__)) || defined(S_SPLINT_S) +# if (__STDC__ && __STDC_VERSION__ >= 199901L) || defined(S_SPLINT_S) # define stdint_int64_defined - typedef long long int64_t; - typedef unsigned long long uint64_t; +typedef long long int64_t; +typedef unsigned long long uint64_t; # define UINT64_C(v) v ## ULL # define INT64_C(v) v ## LL # ifndef PRINTF_INT64_MODIFIER @@ -451,29 +451,29 @@ # endif #endif -#if !defined (stdint_int64_defined) +#if !defined(stdint_int64_defined) # if defined(__GNUC__) # define stdint_int64_defined - __extension__ typedef long long int64_t; - __extension__ typedef unsigned long long uint64_t; +__extension__ typedef long long int64_t; +__extension__ typedef unsigned long long uint64_t; # define UINT64_C(v) v ## ULL # define INT64_C(v) v ## LL # ifndef PRINTF_INT64_MODIFIER # define PRINTF_INT64_MODIFIER "ll" # endif -# elif defined(__MWERKS__) || defined (__SUNPRO_C) || defined (__SUNPRO_CC) || defined (__APPLE_CC__) || defined (_LONG_LONG) || defined (_CRAYC) || defined (S_SPLINT_S) +# elif defined(__MWERKS__) || defined(__SUNPRO_C) || defined(__SUNPRO_CC) || defined(__APPLE_CC__) || defined(_LONG_LONG) || defined(_CRAYC) || defined(S_SPLINT_S) # define stdint_int64_defined - typedef long long int64_t; - typedef unsigned long long uint64_t; +typedef long long int64_t; +typedef unsigned long long uint64_t; # define UINT64_C(v) v ## ULL # define INT64_C(v) v ## LL # ifndef PRINTF_INT64_MODIFIER # define PRINTF_INT64_MODIFIER "ll" # endif -# elif (defined(__WATCOMC__) && defined(__WATCOM_INT64__)) || (defined(_MSC_VER) && _INTEGRAL_MAX_BITS >= 64) || (defined (__BORLANDC__) && __BORLANDC__ > 0x460) || defined (__alpha) || defined (__DECC) +# elif (defined(__WATCOMC__) && defined(__WATCOM_INT64__)) || (defined(_MSC_VER) && _INTEGRAL_MAX_BITS >= 64) || (defined(__BORLANDC__) && __BORLANDC__ > 0x460) || defined(__alpha) || defined(__DECC) # define stdint_int64_defined - typedef __int64 int64_t; - typedef unsigned __int64 uint64_t; +typedef __int64 int64_t; +typedef unsigned __int64 uint64_t; # define UINT64_C(v) v ## UI64 # define INT64_C(v) v ## I64 # ifndef PRINTF_INT64_MODIFIER @@ -482,20 +482,20 @@ # endif #endif -#if !defined (LONG_LONG_MAX) && defined (INT64_C) +#if !defined(LONG_LONG_MAX) && defined(INT64_C) # define LONG_LONG_MAX INT64_C (9223372036854775807) #endif #ifndef ULONG_LONG_MAX # define ULONG_LONG_MAX UINT64_C (18446744073709551615) #endif -#if !defined (INT64_MAX) && defined (INT64_C) +#if !defined(INT64_MAX) && defined(INT64_C) # define INT64_MAX INT64_C (9223372036854775807) #endif -#if !defined (INT64_MIN) && defined (INT64_C) +#if !defined(INT64_MIN) && defined(INT64_C) # define INT64_MIN INT64_C (-9223372036854775808) #endif -#if !defined (UINT64_MAX) && defined (INT64_C) +#if !defined(UINT64_MAX) && defined(INT64_C) # define UINT64_MAX UINT64_C (18446744073709551615) #endif @@ -536,8 +536,8 @@ */ #ifdef stdint_int64_defined - typedef int64_t intmax_t; - typedef uint64_t uintmax_t; +typedef int64_t intmax_t; +typedef uint64_t uintmax_t; # define INTMAX_MAX INT64_MAX # define INTMAX_MIN INT64_MIN # define UINTMAX_MAX UINT64_MAX @@ -553,8 +553,8 @@ # define PRINTF_INTMAX_DEC_WIDTH PRINTF_INT64_DEC_WIDTH # endif #else - typedef int32_t intmax_t; - typedef uint32_t uintmax_t; +typedef int32_t intmax_t; +typedef uint32_t uintmax_t; # define INTMAX_MAX INT32_MAX # define UINTMAX_MAX UINT32_MAX # define UINTMAX_C(v) UINT32_C(v) @@ -578,12 +578,12 @@ */ #ifndef stdint_least_defined - typedef int8_t int_least8_t; - typedef uint8_t uint_least8_t; - typedef int16_t int_least16_t; - typedef uint16_t uint_least16_t; - typedef int32_t int_least32_t; - typedef uint32_t uint_least32_t; +typedef int8_t int_least8_t; +typedef uint8_t uint_least8_t; +typedef int16_t int_least16_t; +typedef uint16_t uint_least16_t; +typedef int32_t int_least32_t; +typedef uint32_t uint_least32_t; # define PRINTF_LEAST32_MODIFIER PRINTF_INT32_MODIFIER # define PRINTF_LEAST16_MODIFIER PRINTF_INT16_MODIFIER # define UINT_LEAST8_MAX UINT8_MAX @@ -596,8 +596,8 @@ # define INT_LEAST16_MIN INT16_MIN # define INT_LEAST32_MIN INT32_MIN # ifdef stdint_int64_defined - typedef int64_t int_least64_t; - typedef uint64_t uint_least64_t; +typedef int64_t int_least64_t; +typedef uint64_t uint_least64_t; # define PRINTF_LEAST64_MODIFIER PRINTF_INT64_MODIFIER # define UINT_LEAST64_MAX UINT64_MAX # define INT_LEAST64_MAX INT64_MAX @@ -633,8 +633,8 @@ typedef uint_least32_t uint_fast32_t; #define INT_FAST16_MIN INT_LEAST16_MIN #define INT_FAST32_MIN INT_LEAST32_MIN #ifdef stdint_int64_defined - typedef int_least64_t int_fast64_t; - typedef uint_least64_t uint_fast64_t; +typedef int_least64_t int_fast64_t; +typedef uint_least64_t uint_fast64_t; # define UINT_FAST64_MAX UINT_LEAST64_MAX # define INT_FAST64_MAX INT_LEAST64_MAX # define INT_FAST64_MIN INT_LEAST64_MIN @@ -647,7 +647,7 @@ typedef uint_least32_t uint_fast32_t; * type limits. */ -#if defined(__WATCOMC__) || defined(_MSC_VER) || defined (__GNUC__) +#if defined(__WATCOMC__) || defined(_MSC_VER) || defined(__GNUC__) # include # ifndef WCHAR_MIN # define WCHAR_MIN 0 @@ -662,28 +662,28 @@ typedef uint_least32_t uint_fast32_t; * (u)intptr_t types and limits. */ -#if defined (_MSC_VER) && defined (_UINTPTR_T_DEFINED) +#if defined(_MSC_VER) && defined(_UINTPTR_T_DEFINED) # define STDINT_H_UINTPTR_T_DEFINED #endif #ifndef STDINT_H_UINTPTR_T_DEFINED -# if defined (__alpha__) || defined (__ia64__) || defined (__x86_64__) || defined (_WIN64) +# if defined(__alpha__) || defined(__ia64__) || defined(__x86_64__) || defined(_WIN64) # define stdint_intptr_bits 64 -# elif defined (__WATCOMC__) || defined (__TURBOC__) +# elif defined(__WATCOMC__) || defined(__TURBOC__) # if defined(__TINY__) || defined(__SMALL__) || defined(__MEDIUM__) # define stdint_intptr_bits 16 # else # define stdint_intptr_bits 32 # endif -# elif defined (__i386__) || defined (_WIN32) || defined (WIN32) +# elif defined(__i386__) || defined(_WIN32) || defined(WIN32) # define stdint_intptr_bits 32 -# elif defined (__INTEL_COMPILER) +# elif defined(__INTEL_COMPILER) /* TODO -- what did Intel do about x86-64? */ # endif # ifdef stdint_intptr_bits -# define stdint_intptr_glue3_i(a,b,c) a##b##c -# define stdint_intptr_glue3(a,b,c) stdint_intptr_glue3_i(a,b,c) +# define stdint_intptr_glue3_i(a, b, c) a##b##c +# define stdint_intptr_glue3(a, b, c) stdint_intptr_glue3_i(a,b,c) # ifndef PRINTF_INTPTR_MODIFIER # define PRINTF_INTPTR_MODIFIER stdint_intptr_glue3(PRINTF_INT,stdint_intptr_bits,_MODIFIER) # endif @@ -708,12 +708,12 @@ typedef uint_least32_t uint_fast32_t; # ifndef UINTPTR_C # define UINTPTR_C(x) stdint_intptr_glue3(UINT,stdint_intptr_bits,_C)(x) # endif - typedef stdint_intptr_glue3(uint,stdint_intptr_bits,_t) uintptr_t; - typedef stdint_intptr_glue3( int,stdint_intptr_bits,_t) intptr_t; +typedef stdint_intptr_glue3 (uint, stdint_intptr_bits, _t) uintptr_t; +typedef stdint_intptr_glue3 (int, stdint_intptr_bits, _t) intptr_t; # else /* TODO -- This following is likely wrong for some platforms, and does nothing for the definition of uintptr_t. */ - typedef ptrdiff_t intptr_t; +typedef ptrdiff_t intptr_t; # endif # define STDINT_H_UINTPTR_T_DEFINED #endif @@ -728,73 +728,74 @@ typedef uint_least32_t uint_fast32_t; #endif -#if defined (__TEST_PSTDINT_FOR_CORRECTNESS) +#if defined(__TEST_PSTDINT_FOR_CORRECTNESS) -/* +/* * Please compile with the maximum warning settings to make sure macros are not * defined more than once. */ - + #include #include #include - -#define glue3_aux(x,y,z) x ## y ## z -#define glue3(x,y,z) glue3_aux(x,y,z) + +#define glue3_aux(x, y, z) x ## y ## z +#define glue3(x, y, z) glue3_aux(x,y,z) #define DECLU(bits) glue3(uint,bits,_t) glue3(u,bits,=) glue3(UINT,bits,_C) (0); #define DECLI(bits) glue3(int,bits,_t) glue3(i,bits,=) glue3(INT,bits,_C) (0); -#define DECL(us,bits) glue3(DECL,us,) (bits) +#define DECL(us, bits) glue3(DECL,us,) (bits) #define TESTUMAX(bits) glue3(u,bits,=) glue3(~,u,bits); if (glue3(UINT,bits,_MAX) glue3(!=,u,bits)) printf ("Something wrong with UINT%d_MAX\n", bits) - -int main () { - DECL(I,8) - DECL(U,8) - DECL(I,16) - DECL(U,16) - DECL(I,32) - DECL(U,32) + +int main() +{ + DECL(I, 8) + DECL(U, 8) + DECL(I, 16) + DECL(U, 16) + DECL(I, 32) + DECL(U, 32) +#ifdef INT64_MAX + DECL(I, 64) + DECL(U, 64) +#endif + intmax_t imax = INTMAX_C(0); + uintmax_t umax = UINTMAX_C(0); + char str0[256], str1[256]; + + sprintf(str0, "%d %x\n", 0, ~0); + + sprintf(str1, "%d %x\n", i8, ~0); + if (0 != strcmp(str0, str1)) printf("Something wrong with i8 : %s\n", str1); + sprintf(str1, "%u %x\n", u8, ~0); + if (0 != strcmp(str0, str1)) printf("Something wrong with u8 : %s\n", str1); + sprintf(str1, "%d %x\n", i16, ~0); + if (0 != strcmp(str0, str1)) printf("Something wrong with i16 : %s\n", str1); + sprintf(str1, "%u %x\n", u16, ~0); + if (0 != strcmp(str0, str1)) printf("Something wrong with u16 : %s\n", str1); + sprintf(str1, "%" PRINTF_INT32_MODIFIER "d %x\n", i32, ~0); + if (0 != strcmp(str0, str1)) printf("Something wrong with i32 : %s\n", str1); + sprintf(str1, "%" PRINTF_INT32_MODIFIER "u %x\n", u32, ~0); + if (0 != strcmp(str0, str1)) printf("Something wrong with u32 : %s\n", str1); #ifdef INT64_MAX - DECL(I,64) - DECL(U,64) -#endif - intmax_t imax = INTMAX_C(0); - uintmax_t umax = UINTMAX_C(0); - char str0[256], str1[256]; - - sprintf (str0, "%d %x\n", 0, ~0); - - sprintf (str1, "%d %x\n", i8, ~0); - if (0 != strcmp (str0, str1)) printf ("Something wrong with i8 : %s\n", str1); - sprintf (str1, "%u %x\n", u8, ~0); - if (0 != strcmp (str0, str1)) printf ("Something wrong with u8 : %s\n", str1); - sprintf (str1, "%d %x\n", i16, ~0); - if (0 != strcmp (str0, str1)) printf ("Something wrong with i16 : %s\n", str1); - sprintf (str1, "%u %x\n", u16, ~0); - if (0 != strcmp (str0, str1)) printf ("Something wrong with u16 : %s\n", str1); - sprintf (str1, "%" PRINTF_INT32_MODIFIER "d %x\n", i32, ~0); - if (0 != strcmp (str0, str1)) printf ("Something wrong with i32 : %s\n", str1); - sprintf (str1, "%" PRINTF_INT32_MODIFIER "u %x\n", u32, ~0); - if (0 != strcmp (str0, str1)) printf ("Something wrong with u32 : %s\n", str1); -#ifdef INT64_MAX - sprintf (str1, "%" PRINTF_INT64_MODIFIER "d %x\n", i64, ~0); - if (0 != strcmp (str0, str1)) printf ("Something wrong with i64 : %s\n", str1); -#endif - sprintf (str1, "%" PRINTF_INTMAX_MODIFIER "d %x\n", imax, ~0); - if (0 != strcmp (str0, str1)) printf ("Something wrong with imax : %s\n", str1); - sprintf (str1, "%" PRINTF_INTMAX_MODIFIER "u %x\n", umax, ~0); - if (0 != strcmp (str0, str1)) printf ("Something wrong with umax : %s\n", str1); - - TESTUMAX(8); - TESTUMAX(16); - TESTUMAX(32); + sprintf(str1, "%" PRINTF_INT64_MODIFIER "d %x\n", i64, ~0); + if (0 != strcmp(str0, str1)) printf("Something wrong with i64 : %s\n", str1); +#endif + sprintf(str1, "%" PRINTF_INTMAX_MODIFIER "d %x\n", imax, ~0); + if (0 != strcmp(str0, str1)) printf("Something wrong with imax : %s\n", str1); + sprintf(str1, "%" PRINTF_INTMAX_MODIFIER "u %x\n", umax, ~0); + if (0 != strcmp(str0, str1)) printf("Something wrong with umax : %s\n", str1); + + TESTUMAX(8); + TESTUMAX(16); + TESTUMAX(32); #ifdef INT64_MAX - TESTUMAX(64); + TESTUMAX(64); #endif - return EXIT_SUCCESS; + return EXIT_SUCCESS; } #endif diff --git a/src/ControlFlowGraph.cpp b/src/ControlFlowGraph.cpp index 6131924..92f7918 100644 --- a/src/ControlFlowGraph.cpp +++ b/src/ControlFlowGraph.cpp @@ -1,4 +1,3 @@ - #include "internal_includes/debug.h" #include "internal_includes/ControlFlowGraph.h" #include "internal_includes/ControlFlowGraphUtils.h" @@ -12,328 +11,324 @@ using HLSLcc::ForEachOperand; const BasicBlock &ControlFlowGraph::Build(const Instruction *firstInstruction) { - using std::for_each; - - m_BlockMap.clear(); - m_BlockStorage.clear(); - - // Self-registering into m_BlockStorage so it goes out of the scope when ControlFlowGraph does - BasicBlock *root = new BasicBlock(Utils::GetNextNonLabelInstruction(firstInstruction), *this, NULL); - - // Build the reachable set for each block - bool hadChanges; - do - { - hadChanges = false; - for_each(m_BlockStorage.begin(), m_BlockStorage.end(), [&](const shared_ptr &bb) - { - BasicBlock &b = *bb.get(); - if (b.RebuildReachable()) - { - hadChanges = true; - } - }); - } while (hadChanges == true); - - return *root; + using std::for_each; + + m_BlockMap.clear(); + m_BlockStorage.clear(); + + // Self-registering into m_BlockStorage so it goes out of the scope when ControlFlowGraph does + BasicBlock *root = new BasicBlock(Utils::GetNextNonLabelInstruction(firstInstruction), *this, NULL); + + // Build the reachable set for each block + bool hadChanges; + do + { + hadChanges = false; + for_each(m_BlockStorage.begin(), m_BlockStorage.end(), [&](const shared_ptr &bb) + { + BasicBlock &b = *bb.get(); + if (b.RebuildReachable()) + { + hadChanges = true; + } + }); + } + while (hadChanges == true); + + return *root; } const BasicBlock *ControlFlowGraph::GetBasicBlockForInstruction(const Instruction *instruction) const { - BasicBlockMap::const_iterator itr = m_BlockMap.find(Utils::GetNextNonLabelInstruction(instruction)); - if (itr == m_BlockMap.end()) - return NULL; + BasicBlockMap::const_iterator itr = m_BlockMap.find(Utils::GetNextNonLabelInstruction(instruction)); + if (itr == m_BlockMap.end()) + return NULL; - return itr->second; + return itr->second; } BasicBlock *ControlFlowGraph::GetBasicBlockForInstruction(const Instruction *instruction) { - BasicBlockMap::iterator itr = m_BlockMap.find(Utils::GetNextNonLabelInstruction(instruction)); - if (itr == m_BlockMap.end()) - return NULL; + BasicBlockMap::iterator itr = m_BlockMap.find(Utils::GetNextNonLabelInstruction(instruction)); + if (itr == m_BlockMap.end()) + return NULL; - return itr->second; + return itr->second; } - - - // Generate a basic block. Private constructor, can only be constructed from ControlFlowGraph::Build(). // Auto-registers itself into ControlFlowGraph BasicBlock::BasicBlock(const Instruction *psFirst, ControlFlowGraph &graph, const Instruction *psPrecedingBlockHead) - : m_Graph(graph) - , m_First(psFirst) - , m_Last(NULL) + : m_Graph(graph) + , m_First(psFirst) + , m_Last(NULL) { - m_UEVar.clear(); - m_VarKill.clear(); - m_Preceding.clear(); - m_Succeeding.clear(); - m_DEDef.clear(); - m_Reachable.clear(); - - // Check that we've pruned the labels - ASSERT(psFirst == Utils::GetNextNonLabelInstruction(psFirst)); - - // Insert to block storage, block map and connect to previous block - m_Graph.m_BlockStorage.push_back(shared_ptr(this)); - - bool didInsert = m_Graph.m_BlockMap.insert(std::make_pair(psFirst, this)).second; - ASSERT(didInsert); - - if (psPrecedingBlockHead != NULL) - { - m_Preceding.insert(psPrecedingBlockHead); - BasicBlock *prec = m_Graph.GetBasicBlockForInstruction(psPrecedingBlockHead); - ASSERT(prec != 0); - didInsert = prec->m_Succeeding.insert(psFirst).second; - ASSERT(didInsert); - } - - Build(); + m_UEVar.clear(); + m_VarKill.clear(); + m_Preceding.clear(); + m_Succeeding.clear(); + m_DEDef.clear(); + m_Reachable.clear(); + + // Check that we've pruned the labels + ASSERT(psFirst == Utils::GetNextNonLabelInstruction(psFirst)); + + // Insert to block storage, block map and connect to previous block + m_Graph.m_BlockStorage.push_back(shared_ptr(this)); + + bool didInsert = m_Graph.m_BlockMap.insert(std::make_pair(psFirst, this)).second; + ASSERT(didInsert); + + if (psPrecedingBlockHead != NULL) + { + m_Preceding.insert(psPrecedingBlockHead); + BasicBlock *prec = m_Graph.GetBasicBlockForInstruction(psPrecedingBlockHead); + ASSERT(prec != 0); + didInsert = prec->m_Succeeding.insert(psFirst).second; + ASSERT(didInsert); + } + + Build(); } void BasicBlock::Build() { - const Instruction *inst = m_First; - while (1) - { - // Process sources first - ForEachOperand(inst, inst+1, FEO_FLAG_SRC_OPERAND | FEO_FLAG_SUBOPERAND, - [this](const Instruction *psInst, const Operand *psOperand, uint32_t ui32OperandType) - { - if (psOperand->eType != OPERAND_TYPE_TEMP) - return; - - uint32_t tempReg = psOperand->ui32RegisterNumber; - uint32_t accessMask = psOperand->GetAccessMask(); - - // Go through each component - for (int k = 0; k < 4; k++) - { - if (!(accessMask & (1 << k))) - continue; - - uint32_t regIdx = tempReg * 4 + k; - // Is this idx already in the kill set, meaning that it's already been re-defined in this basic block? Ignore - if (m_VarKill.find(regIdx) != m_VarKill.end()) - continue; - - // Add to UEVars set. Doesn't matter if it's already there. - m_UEVar.insert(regIdx); - } - return; - }); - - // Then the destination operands - ForEachOperand(inst, inst+1, FEO_FLAG_DEST_OPERAND, - [this](const Instruction *psInst, const Operand *psOperand, uint32_t ui32OperandType) - { - if (psOperand->eType != OPERAND_TYPE_TEMP) - return; - - uint32_t tempReg = psOperand->ui32RegisterNumber; - uint32_t accessMask = psOperand->GetAccessMask(); - - // Go through each component - for (int k = 0; k < 4; k++) - { - if (!(accessMask & (1 << k))) - continue; - - uint32_t regIdx = tempReg * 4 + k; - - // Add to kill set. Dupes are fine, this is a set. - m_VarKill.insert(regIdx); - // Also into the downward definitions. Overwrite the previous definition in this basic block, if any - Definition d(psInst, psOperand); - m_DEDef[regIdx].clear(); - m_DEDef[regIdx].insert(d); - } - return; - }); - - // Check for flow control instructions - bool blockDone = false; - switch (inst->eOpcode) - { - default: - break; - case OPCODE_RET: - blockDone = true; - break; - case OPCODE_RETC: - // Basic block is done, start a next one. - // There REALLY should be no existing blocks for this one - ASSERT(m_Graph.GetBasicBlockForInstruction(Utils::GetNextNonLabelInstruction(inst+1)) == NULL); - AddChildBasicBlock(Utils::GetNextNonLabelInstruction(inst + 1)); - blockDone = true; - break; - case OPCODE_LOOP: - case OPCODE_CASE: - case OPCODE_ENDIF: - case OPCODE_ENDSWITCH: - // Not a flow control branch, but need to start a new block anyway. - AddChildBasicBlock(Utils::GetNextNonLabelInstruction(inst + 1)); - blockDone = true; - break; - - // Branches - case OPCODE_IF: - case OPCODE_BREAKC: - case OPCODE_CONTINUEC: - { - const Instruction *jumpPoint = Utils::GetJumpPoint(inst); - ASSERT(jumpPoint != NULL); - - // The control branches to the next instruction or jumps to jumpPoint - AddChildBasicBlock(Utils::GetNextNonLabelInstruction(inst+1)); - AddChildBasicBlock(jumpPoint); - - blockDone = true; - break; - } - case OPCODE_SWITCH: - { - bool sawEndSwitch = false; - bool needConnectToParent = false; - const Instruction *jumpPoint = Utils::GetJumpPoint(inst, &sawEndSwitch, &needConnectToParent); - ASSERT(jumpPoint != NULL); - - while (1) - { - if(!sawEndSwitch || needConnectToParent) - AddChildBasicBlock(jumpPoint); - - if (sawEndSwitch) - break; - - // The -1 is a bit of a hack: we always scroll past all labels so rewind to the last one so we'll know to search for the next label - ASSERT((jumpPoint - 1)->eOpcode == OPCODE_CASE || (jumpPoint - 1)->eOpcode == OPCODE_DEFAULT); - jumpPoint = Utils::GetJumpPoint(jumpPoint-1, &sawEndSwitch, &needConnectToParent); - ASSERT(jumpPoint != NULL); - } - blockDone = true; - break; - } - - // Non-conditional jumps - case OPCODE_BREAK: - case OPCODE_ELSE: - case OPCODE_CONTINUE: - case OPCODE_ENDLOOP: - { - const Instruction *jumpPoint = Utils::GetJumpPoint(inst); - ASSERT(jumpPoint != NULL); - - AddChildBasicBlock(jumpPoint); - - blockDone = true; - break; - } - } - - if (blockDone) - break; - - inst++; - } - // In initial building phase, just make m_Reachable equal to m_DEDef - m_Reachable = m_DEDef; - - // Tag the end of the basic block - m_Last = inst; -// printf("Basic Block %d -> %d\n", (int)m_First->id, (int)m_Last->id); + const Instruction *inst = m_First; + while (1) + { + // Process sources first + ForEachOperand(inst, inst + 1, FEO_FLAG_SRC_OPERAND | FEO_FLAG_SUBOPERAND, + [this](const Instruction *psInst, const Operand *psOperand, uint32_t ui32OperandType) + { + if (psOperand->eType != OPERAND_TYPE_TEMP) + return; + + uint32_t tempReg = psOperand->ui32RegisterNumber; + uint32_t accessMask = psOperand->GetAccessMask(); + + // Go through each component + for (int k = 0; k < 4; k++) + { + if (!(accessMask & (1 << k))) + continue; + + uint32_t regIdx = tempReg * 4 + k; + // Is this idx already in the kill set, meaning that it's already been re-defined in this basic block? Ignore + if (m_VarKill.find(regIdx) != m_VarKill.end()) + continue; + + // Add to UEVars set. Doesn't matter if it's already there. + m_UEVar.insert(regIdx); + } + return; + }); + + // Then the destination operands + ForEachOperand(inst, inst + 1, FEO_FLAG_DEST_OPERAND, + [this](const Instruction *psInst, const Operand *psOperand, uint32_t ui32OperandType) + { + if (psOperand->eType != OPERAND_TYPE_TEMP) + return; + + uint32_t tempReg = psOperand->ui32RegisterNumber; + uint32_t accessMask = psOperand->GetAccessMask(); + + // Go through each component + for (int k = 0; k < 4; k++) + { + if (!(accessMask & (1 << k))) + continue; + + uint32_t regIdx = tempReg * 4 + k; + + // Add to kill set. Dupes are fine, this is a set. + m_VarKill.insert(regIdx); + // Also into the downward definitions. Overwrite the previous definition in this basic block, if any + Definition d(psInst, psOperand); + m_DEDef[regIdx].clear(); + m_DEDef[regIdx].insert(d); + } + return; + }); + + // Check for flow control instructions + bool blockDone = false; + switch (inst->eOpcode) + { + default: + break; + case OPCODE_RET: + blockDone = true; + break; + case OPCODE_RETC: + // Basic block is done, start a next one. + // There REALLY should be no existing blocks for this one + ASSERT(m_Graph.GetBasicBlockForInstruction(Utils::GetNextNonLabelInstruction(inst + 1)) == NULL); + AddChildBasicBlock(Utils::GetNextNonLabelInstruction(inst + 1)); + blockDone = true; + break; + case OPCODE_LOOP: + case OPCODE_CASE: + case OPCODE_ENDIF: + case OPCODE_ENDSWITCH: + // Not a flow control branch, but need to start a new block anyway. + AddChildBasicBlock(Utils::GetNextNonLabelInstruction(inst + 1)); + blockDone = true; + break; + + // Branches + case OPCODE_IF: + case OPCODE_BREAKC: + case OPCODE_CONTINUEC: + { + const Instruction *jumpPoint = Utils::GetJumpPoint(inst); + ASSERT(jumpPoint != NULL); + + // The control branches to the next instruction or jumps to jumpPoint + AddChildBasicBlock(Utils::GetNextNonLabelInstruction(inst + 1)); + AddChildBasicBlock(jumpPoint); + + blockDone = true; + break; + } + case OPCODE_SWITCH: + { + bool sawEndSwitch = false; + bool needConnectToParent = false; + const Instruction *jumpPoint = Utils::GetJumpPoint(inst, &sawEndSwitch, &needConnectToParent); + ASSERT(jumpPoint != NULL); + + while (1) + { + if (!sawEndSwitch || needConnectToParent) + AddChildBasicBlock(jumpPoint); + + if (sawEndSwitch) + break; + + // The -1 is a bit of a hack: we always scroll past all labels so rewind to the last one so we'll know to search for the next label + ASSERT((jumpPoint - 1)->eOpcode == OPCODE_CASE || (jumpPoint - 1)->eOpcode == OPCODE_DEFAULT); + jumpPoint = Utils::GetJumpPoint(jumpPoint - 1, &sawEndSwitch, &needConnectToParent); + ASSERT(jumpPoint != NULL); + } + blockDone = true; + break; + } + + // Non-conditional jumps + case OPCODE_BREAK: + case OPCODE_ELSE: + case OPCODE_CONTINUE: + case OPCODE_ENDLOOP: + { + const Instruction *jumpPoint = Utils::GetJumpPoint(inst); + ASSERT(jumpPoint != NULL); + + AddChildBasicBlock(jumpPoint); + + blockDone = true; + break; + } + } + + if (blockDone) + break; + + inst++; + } + // In initial building phase, just make m_Reachable equal to m_DEDef + m_Reachable = m_DEDef; + + // Tag the end of the basic block + m_Last = inst; +// printf("Basic Block %d -> %d\n", (int)m_First->id, (int)m_Last->id); } - BasicBlock * BasicBlock::AddChildBasicBlock(const Instruction *psFirst) { - // First see if this already exists - BasicBlock *b = m_Graph.GetBasicBlockForInstruction(psFirst); - if (b) - { - // Just add dependency and we're done - b->m_Preceding.insert(m_First); - m_Succeeding.insert(psFirst); - return b; - } - // Otherwise create one. Self-registering and self-connecting - return new BasicBlock(psFirst, m_Graph, m_First); + // First see if this already exists + BasicBlock *b = m_Graph.GetBasicBlockForInstruction(psFirst); + if (b) + { + // Just add dependency and we're done + b->m_Preceding.insert(m_First); + m_Succeeding.insert(psFirst); + return b; + } + // Otherwise create one. Self-registering and self-connecting + return new BasicBlock(psFirst, m_Graph, m_First); } bool BasicBlock::RebuildReachable() { - // Building the Reachable set is an iterative process, where each block gets rebuilt until nothing changes. - // Formula: reachable = this.DEDef union ( each preceding.Reachable() minus this.VarKill()) - - ReachableVariables newReachable = m_DEDef; - bool hasChanges = false; - - // Loop each predecessor - std::for_each(Preceding().begin(), Preceding().end(), [&](const Instruction *instr) - { - const BasicBlock *prec = m_Graph.GetBasicBlockForInstruction(instr); - const ReachableVariables &precReachable = prec->Reachable(); - - // Loop each variable*component - std::for_each(precReachable.begin(), precReachable.end(), [&](const std::pair &itr2) - { - uint32_t regIdx = itr2.first; - const BasicBlock::ReachableDefinitionsPerVariable &defs = itr2.second; - - // Already killed in this block? - if (VarKill().find(regIdx) != VarKill().end()) - return; - - // Only do comparisons against current definitions if we've yet to find any changes - BasicBlock::ReachableDefinitionsPerVariable *currReachablePerVar = 0; - if (!hasChanges) - currReachablePerVar = &m_Reachable[regIdx]; - - BasicBlock::ReachableDefinitionsPerVariable &newReachablePerVar = newReachable[regIdx]; - - // Loop each definition - std::for_each(defs.begin(), defs.end(), [&](const BasicBlock::Definition &d) - { - if (!hasChanges) - { - // Check if already there - if (currReachablePerVar->find(d) == currReachablePerVar->end()) - hasChanges = true; - } - newReachablePerVar.insert(d); - }); // definition - - }); // variable*component - }); // predecessor - - if (hasChanges) - { - std::swap(m_Reachable, newReachable); - } - - return hasChanges; + // Building the Reachable set is an iterative process, where each block gets rebuilt until nothing changes. + // Formula: reachable = this.DEDef union ( each preceding.Reachable() minus this.VarKill()) + + ReachableVariables newReachable = m_DEDef; + bool hasChanges = false; + + // Loop each predecessor + std::for_each(Preceding().begin(), Preceding().end(), [&](const Instruction *instr) + { + const BasicBlock *prec = m_Graph.GetBasicBlockForInstruction(instr); + const ReachableVariables &precReachable = prec->Reachable(); + + // Loop each variable*component + std::for_each(precReachable.begin(), precReachable.end(), [&](const std::pair &itr2) + { + uint32_t regIdx = itr2.first; + const BasicBlock::ReachableDefinitionsPerVariable &defs = itr2.second; + + // Already killed in this block? + if (VarKill().find(regIdx) != VarKill().end()) + return; + + // Only do comparisons against current definitions if we've yet to find any changes + BasicBlock::ReachableDefinitionsPerVariable *currReachablePerVar = 0; + if (!hasChanges) + currReachablePerVar = &m_Reachable[regIdx]; + + BasicBlock::ReachableDefinitionsPerVariable &newReachablePerVar = newReachable[regIdx]; + + // Loop each definition + std::for_each(defs.begin(), defs.end(), [&](const BasicBlock::Definition &d) + { + if (!hasChanges) + { + // Check if already there + if (currReachablePerVar->find(d) == currReachablePerVar->end()) + hasChanges = true; + } + newReachablePerVar.insert(d); + }); // definition + }); // variable*component + }); // predecessor + + if (hasChanges) + { + std::swap(m_Reachable, newReachable); + } + + return hasChanges; } void BasicBlock::RVarUnion(ReachableVariables &a, const ReachableVariables &b) { - std::for_each(b.begin(), b.end(), [&a](const std::pair &rpvPair) - { - uint32_t regIdx = rpvPair.first; - const ReachableDefinitionsPerVariable &rpv = rpvPair.second; - // No previous definitions for this variable? - auto aRPVItr = a.find(regIdx); - if (aRPVItr == a.end()) - { - // Just set the definitions and continue - a[regIdx] = rpv; - return; - } - ReachableDefinitionsPerVariable &aRPV = aRPVItr->second; - aRPV.insert(rpv.begin(), rpv.end()); - }); + std::for_each(b.begin(), b.end(), [&a](const std::pair &rpvPair) + { + uint32_t regIdx = rpvPair.first; + const ReachableDefinitionsPerVariable &rpv = rpvPair.second; + // No previous definitions for this variable? + auto aRPVItr = a.find(regIdx); + if (aRPVItr == a.end()) + { + // Just set the definitions and continue + a[regIdx] = rpv; + return; + } + ReachableDefinitionsPerVariable &aRPV = aRPVItr->second; + aRPV.insert(rpv.begin(), rpv.end()); + }); } #if ENABLE_UNIT_TESTS @@ -343,482 +338,475 @@ void BasicBlock::RVarUnion(ReachableVariables &a, const ReachableVariables &b) UNIT_TEST_SUITE(HLSLcc) { - TEST(ControlFlowGraph_Build_Simple_Works) - { - Instruction inst[] = - { - // MOV t0.xyzw, I0.xyzw - Instruction(0, OPCODE_MOV, 0, 0xf, 0xffffffff, 0xf), - Instruction(1, OPCODE_RET) - }; - - ControlFlowGraph cfg; - const BasicBlock &root = cfg.Build(inst); - - CHECK_EQUAL(&inst[0], root.First()); - CHECK_EQUAL(&inst[1], root.Last()); - - CHECK(root.Preceding().empty()); - CHECK(root.Succeeding().empty()); - - CHECK_EQUAL(4, root.VarKill().size()); - - // Check that all components from t0 are killed - CHECK_EQUAL(1, root.VarKill().count(0)); - CHECK_EQUAL(1, root.VarKill().count(1)); - CHECK_EQUAL(1, root.VarKill().count(2)); - CHECK_EQUAL(1, root.VarKill().count(3)); - - CHECK_EQUAL(&inst[0], root.DEDef().find(0)->second.begin()->m_Instruction); - CHECK_EQUAL(&inst[0].asOperands[0], root.DEDef().find(0)->second.begin()->m_Operand); - CHECK_EQUAL(&inst[0], root.DEDef().find(1)->second.begin()->m_Instruction); - CHECK_EQUAL(&inst[0].asOperands[0], root.DEDef().find(1)->second.begin()->m_Operand); - CHECK_EQUAL(&inst[0], root.DEDef().find(2)->second.begin()->m_Instruction); - CHECK_EQUAL(&inst[0].asOperands[0], root.DEDef().find(2)->second.begin()->m_Operand); - CHECK_EQUAL(&inst[0], root.DEDef().find(3)->second.begin()->m_Instruction); - CHECK_EQUAL(&inst[0].asOperands[0], root.DEDef().find(3)->second.begin()->m_Operand); - - } - - TEST(ControlFlowGraph_Build_If_Works) - { - Instruction inst[] = - { - // B0 - // 0: MOV t1.xyzw, i0.xyzw - Instruction(0, OPCODE_MOV, 1, 0xf, 0xffffffff, 0xf), - // 1: MUL t0, t1, t1 - Instruction(1, OPCODE_MUL, 0, 0xf, 1, 0xf, 1, 0xf), - // 2: IF t1.y - Instruction(2, OPCODE_IF, 1, 2), - // B1 - // 3: MOV o0, t0 - Instruction(3, OPCODE_MOV, 0xffffffff, 0xf, 0, 0xf), - // 4: - Instruction(4, OPCODE_ELSE), - // B2 - // 5: MOV o0, t1 - Instruction(5, OPCODE_MOV, 0xffffffff, 0xf, 1, 0xf), - // 6: - Instruction(6, OPCODE_ENDIF), - // B3 - // 7: - Instruction(7, OPCODE_NOP), - // 8: - Instruction(8, OPCODE_RET) - }; - - ControlFlowGraph cfg; - const BasicBlock &root = cfg.Build(inst); - - CHECK_EQUAL(root.First(), &inst[0]); - CHECK_EQUAL(root.Last(), &inst[2]); - - CHECK(root.Preceding().empty()); - - const BasicBlock *b1 = cfg.GetBasicBlockForInstruction(&inst[3]); - const BasicBlock *b2 = cfg.GetBasicBlockForInstruction(&inst[5]); - const BasicBlock *b3 = cfg.GetBasicBlockForInstruction(&inst[7]); - - CHECK(b1 != NULL); - CHECK(b2 != NULL); - CHECK(b3 != NULL); - - CHECK_EQUAL(&inst[3], b1->First()); - CHECK_EQUAL(&inst[5], b2->First()); - CHECK_EQUAL(&inst[7], b3->First()); - - CHECK_EQUAL(&inst[4], b1->Last()); - CHECK_EQUAL(&inst[6], b2->Last()); - CHECK_EQUAL(&inst[8], b3->Last()); - - CHECK_EQUAL(1, root.Succeeding().count(&inst[3])); - CHECK_EQUAL(1, root.Succeeding().count(&inst[5])); - CHECK_EQUAL(2, root.Succeeding().size()); - - CHECK_EQUAL(1, b1->Preceding().size()); - CHECK_EQUAL(1, b1->Preceding().count(&inst[0])); - - CHECK_EQUAL(1, b2->Preceding().size()); - CHECK_EQUAL(1, b2->Preceding().count(&inst[0])); - - CHECK_EQUAL(2, b3->Preceding().size()); - CHECK_EQUAL(0, b3->Preceding().count(&inst[0])); - CHECK_EQUAL(1, b3->Preceding().count(&inst[3])); - CHECK_EQUAL(1, b3->Preceding().count(&inst[5])); - - // The if block must have upwards-exposed t0 - CHECK_EQUAL(1, b1->UEVar().count(0)); - CHECK_EQUAL(1, b1->UEVar().count(1)); - CHECK_EQUAL(1, b1->UEVar().count(2)); - CHECK_EQUAL(1, b1->UEVar().count(3)); - - // The else block must have upwards-exposed t1 - CHECK_EQUAL(1, b2->UEVar().count(4)); - CHECK_EQUAL(1, b2->UEVar().count(5)); - CHECK_EQUAL(1, b2->UEVar().count(6)); - CHECK_EQUAL(1, b2->UEVar().count(7)); - - CHECK_EQUAL(8, root.VarKill().size()); - - // Check that all components from t0 and t1 are killed - CHECK_EQUAL(1, root.VarKill().count(0)); - CHECK_EQUAL(1, root.VarKill().count(1)); - CHECK_EQUAL(1, root.VarKill().count(2)); - CHECK_EQUAL(1, root.VarKill().count(3)); - - CHECK_EQUAL(1, root.VarKill().count(4)); - CHECK_EQUAL(1, root.VarKill().count(5)); - CHECK_EQUAL(1, root.VarKill().count(6)); - CHECK_EQUAL(1, root.VarKill().count(7)); - - // The expected downwards-exposed definitions: - // B0: t0, t1 - // B1-B3: none - - CHECK_EQUAL(8, root.DEDef().size()); - CHECK_EQUAL(0, b1->DEDef().size()); - CHECK_EQUAL(0, b2->DEDef().size()); - CHECK_EQUAL(0, b3->DEDef().size()); - - CHECK(root.DEDef()==root.Reachable()); - - CHECK(root.Reachable()==b1->Reachable()); - CHECK(root.Reachable()==b2->Reachable()); - CHECK(root.Reachable()==b3->Reachable()); - - - } - - TEST(ControlFlowGraph_Build_SwitchCase_Works) - { - Instruction inst[] = - { - // Start B0 - // i0: MOV t0.x, I0.x - Instruction(0, OPCODE_MOV, 0, 1, 0xffffffff, 1), - // i1: MOVE t1.xyz, I0.yzw - Instruction(1, OPCODE_MOV, 1, 7, 0xffffffff, 0xe), - // i2: MOVE t1.w, t0.x - Instruction(2, OPCODE_MOV, 1, 8, 0xffffffff, 0x1), - // i3: MOVE t2, I0 - Instruction(3, OPCODE_MOV, 2, 0xf, 0xffffffff, 0xf), - // i4: SWITCH t0.y - Instruction(4, OPCODE_SWITCH, 1, 2), - // End B0 - // i5: CASE - Instruction(5, OPCODE_CASE), - // i6: DEFAULT - Instruction(6, OPCODE_DEFAULT), - // Start B1 - // i7: MOC t1.z, t0.x - Instruction(7, OPCODE_MOV, 1, 4, 0, 1), - // i8: CASE - Instruction(8, OPCODE_CASE), - // End B1 - // Start B2 - // i9: MOV t1.z, t2.x - Instruction(9, OPCODE_MOV, 1, 4, 2, 1), - // i10: BREAK - Instruction(10, OPCODE_BREAK), - // End B2 - // i11: CASE - Instruction(11, OPCODE_CASE), - // Start B3 - // i12: MOV t1.z, t2.y - Instruction(12, OPCODE_MOV, 1, 4, 2, 2), - // i13: BREAKC t0.x - Instruction(13, OPCODE_BREAKC, 0, 1), - // End B3 - // i14: CASE - Instruction(14, OPCODE_CASE), - // Start B4 - // i15: MOV t1.z, t2.z - Instruction(15, OPCODE_MOV, 1, 4, 2, 4), - // i16: ENDSWITCH - Instruction(16, OPCODE_ENDSWITCH), - // End B4 - // Start B5 - // i17: MOV o0, t1 - Instruction(17, OPCODE_MOV, 0xffffffff, 0xf, 1, 0xf), - // i18: RET - Instruction(18, OPCODE_RET) - // End B5 - }; - - ControlFlowGraph cfg; - const BasicBlock &root = cfg.Build(inst); - - CHECK_EQUAL(&inst[0], root.First()); - CHECK_EQUAL(&inst[4], root.Last()); - - const BasicBlock *b1 = cfg.GetBasicBlockForInstruction(&inst[7]); - const BasicBlock *b2 = cfg.GetBasicBlockForInstruction(&inst[9]); - const BasicBlock *b3 = cfg.GetBasicBlockForInstruction(&inst[12]); - const BasicBlock *b4 = cfg.GetBasicBlockForInstruction(&inst[15]); - const BasicBlock *b5 = cfg.GetBasicBlockForInstruction(&inst[17]); - - CHECK(b1 != NULL); - CHECK(b2 != NULL); - CHECK(b3 != NULL); - CHECK(b4 != NULL); - CHECK(b5 != NULL); - - // Check instruction ranges - CHECK_EQUAL(&inst[8], b1->Last()); - CHECK_EQUAL(&inst[10], b2->Last()); - CHECK_EQUAL(&inst[13], b3->Last()); - CHECK_EQUAL(&inst[16], b4->Last()); - CHECK_EQUAL(&inst[18], b5->Last()); - - // Nothing before the root, nothing after b5 - CHECK(root.Preceding().empty()); - CHECK(b5->Succeeding().empty()); - - // Check that all connections are there and no others. - - // B0->B1 - // B0->B2 - // B0->B3 - // B0->B4 - CHECK_EQUAL(1, root.Succeeding().count(&inst[7])); - CHECK_EQUAL(1, root.Succeeding().count(&inst[9])); - CHECK_EQUAL(1, root.Succeeding().count(&inst[12])); - CHECK_EQUAL(1, root.Succeeding().count(&inst[15])); - - CHECK_EQUAL(4, root.Succeeding().size()); - - // B1 - - // B1->B2 - CHECK_EQUAL(1, b1->Succeeding().count(&inst[9])); - CHECK_EQUAL(1, b1->Succeeding().size()); - - // B0->B1, reverse - CHECK_EQUAL(1, b1->Preceding().count(&inst[0])); - CHECK_EQUAL(1, b1->Preceding().size()); - - // B2 - - // B2->B5 - CHECK_EQUAL(1, b2->Succeeding().count(&inst[17])); - CHECK_EQUAL(1, b2->Succeeding().size()); - CHECK_EQUAL(1, b2->Preceding().count(&inst[7])); - CHECK_EQUAL(1, b2->Preceding().count(&inst[0])); - CHECK_EQUAL(2, b2->Preceding().size()); - - // B3 - // B3->B4 - // B3->B5 - CHECK_EQUAL(1, b3->Succeeding().count(&inst[15])); - CHECK_EQUAL(1, b3->Succeeding().count(&inst[17])); - CHECK_EQUAL(2, b3->Succeeding().size()); - CHECK_EQUAL(1, b3->Preceding().count(&inst[0])); - CHECK_EQUAL(1, b3->Preceding().size()); - - // B4 - CHECK_EQUAL(1, b4->Succeeding().count(&inst[17])); - CHECK_EQUAL(1, b4->Succeeding().size()); - CHECK_EQUAL(1, b4->Preceding().count(&inst[0])); - CHECK_EQUAL(2, b4->Preceding().size()); - - // B5 - CHECK_EQUAL(0, b5->Succeeding().size()); - CHECK_EQUAL(3, b5->Preceding().size()); //b2, b3, b4 - CHECK_EQUAL(1, b5->Preceding().count(&inst[9])); - CHECK_EQUAL(1, b5->Preceding().count(&inst[12])); - CHECK_EQUAL(1, b5->Preceding().count(&inst[15])); - - - // Verify reachable sets - - CHECK(root.Reachable() == root.DEDef()); - CHECK_EQUAL(9, root.Reachable().size()); - - // B5 should have these reachables: - // t0.x only from b0 - // t1.xy from b0, i1 - // t1.z from b2,i9 + b3,i12 + b4,i15 (the defs from b0 and b1 are killed by b2) - // t1.w from b0, i2 - // t2.xyzw from b0, i3 - - // Cast away const so [] works. - BasicBlock::ReachableVariables &r = (BasicBlock::ReachableVariables &)b5->Reachable(); - - CHECK_EQUAL(9, r.size()); - - CHECK_EQUAL(1, r[0].size()); - CHECK_EQUAL(0, r[1].size()); - CHECK_EQUAL(0, r[2].size()); - CHECK_EQUAL(0, r[3].size()); - CHECK_EQUAL(&inst[0], r[0].begin()->m_Instruction); - - CHECK_EQUAL(1, r[4].size()); - CHECK_EQUAL(1, r[5].size()); - CHECK_EQUAL(3, r[6].size()); - CHECK_EQUAL(1, r[7].size()); - - const BasicBlock::ReachableDefinitionsPerVariable &d = r[6]; - BasicBlock::ReachableDefinitionsPerVariable t; - t.insert(BasicBlock::Definition(&inst[9], &inst[9].asOperands[0])); - t.insert(BasicBlock::Definition(&inst[12], &inst[12].asOperands[0])); - t.insert(BasicBlock::Definition(&inst[15], &inst[15].asOperands[0])); - - CHECK(t == d); - - CHECK_EQUAL(1, r[8].size()); - CHECK_EQUAL(1, r[9].size()); - CHECK_EQUAL(1, r[10].size()); - CHECK_EQUAL(1, r[11].size()); - - - } - - TEST(ControlFlowGraph_Build_Loop_Works) - { - Instruction inst[] = - { - // Start B0 - // i0: MOV t0.x, I0.x - Instruction(0, OPCODE_MOV, 0, 1, 0xffffffff, 1), - // i1: MOVE t1.xy, I0.zw // The .x definition should not make it past the loop, .y should. - Instruction(1, OPCODE_MOV, 1, 3, 0xffffffff, 0xc), - // i2: LOOP - Instruction(2, OPCODE_LOOP, 1, 2), - // End B0 -> B1 - // Begin B1 - // i3: MOV t1.x, t0.x - Instruction(3, OPCODE_MOV, 1, 1, 0, 1), - // i4: BREAKC t0.x - Instruction(4, OPCODE_BREAKC, 0, 1), - // End B1 -> B2, B3 - // Begin B2 - // i5: ADD t0.x, t0.y - Instruction(5, OPCODE_ADD, 0, 1, 0, 2), - // i6: MOV t1.x, t0.x // This should never show up as definition - Instruction(6, OPCODE_MOV, 1, 1, 0, 1), - // i7: ENDLOOP - Instruction(7, OPCODE_ENDLOOP), - // End B2 -> B1 - // Start B3 - // i8: MOV O0.x, t1.x - Instruction(8, OPCODE_MOV, 0xffffffff, 1, 1, 1), - // i9: RET - Instruction(9, OPCODE_RET), - // End B3 - }; - - ControlFlowGraph cfg; - const BasicBlock &root = cfg.Build(inst); - - CHECK_EQUAL(&inst[0], root.First()); - CHECK_EQUAL(&inst[2], root.Last()); - - const BasicBlock *b1 = cfg.GetBasicBlockForInstruction(&inst[3]); - const BasicBlock *b2 = cfg.GetBasicBlockForInstruction(&inst[5]); - const BasicBlock *b3 = cfg.GetBasicBlockForInstruction(&inst[8]); - - CHECK(b1 != NULL); - CHECK(b2 != NULL); - CHECK(b3 != NULL); - - // Check instruction ranges - CHECK_EQUAL(&inst[4], b1->Last()); - CHECK_EQUAL(&inst[7], b2->Last()); - CHECK_EQUAL(&inst[9], b3->Last()); - - // Nothing before the root, nothing after b3 - CHECK(root.Preceding().empty()); - CHECK(b3->Succeeding().empty()); - - // Check that all connections are there and no others. - - // B0->B1 - CHECK_EQUAL(1, root.Succeeding().count(&inst[3])); - CHECK_EQUAL(1, root.Succeeding().size()); - - // B1 - - // B1->B2 - // B1->B3 - CHECK_EQUAL(1, b1->Succeeding().count(&inst[5])); - CHECK_EQUAL(1, b1->Succeeding().count(&inst[8])); - CHECK_EQUAL(2, b1->Succeeding().size()); - - // B0->B1, reverse - CHECK_EQUAL(1, b1->Preceding().count(&inst[0])); - // We may also come from B2 - CHECK_EQUAL(1, b1->Preceding().count(&inst[5])); - CHECK_EQUAL(2, b1->Preceding().size()); - - // B2 - - // B2->B1 - CHECK_EQUAL(1, b2->Succeeding().count(&inst[3])); - CHECK_EQUAL(1, b2->Succeeding().size()); - CHECK_EQUAL(1, b2->Preceding().count(&inst[3])); - CHECK_EQUAL(1, b2->Preceding().size()); - - // B3 - CHECK_EQUAL(1, b3->Preceding().count(&inst[3])); - CHECK_EQUAL(1, b3->Preceding().size()); - - // Verify reachable sets - - - BasicBlock::ReachableVariables t; - - // B0 DEDef and Reachable - t.clear(); - t[0].insert(BasicBlock::Definition(&inst[0], &inst[0].asOperands[0])); - t[4].insert(BasicBlock::Definition(&inst[1], &inst[1].asOperands[0])); - t[5].insert(BasicBlock::Definition(&inst[1], &inst[1].asOperands[0])); - - CHECK(root.DEDef() == t); - CHECK(root.Reachable() == root.DEDef()); - - // B1 DEDef and Reachable - t.clear(); - t[4].insert(BasicBlock::Definition(&inst[3], &inst[3].asOperands[0])); - CHECK(b1->DEDef() == t); - - t = b1->DEDef(); - // t0.x from i0, t1.y (but not .x) from i1 - t[0].insert(BasicBlock::Definition(&inst[0], &inst[0].asOperands[0])); - t[5].insert(BasicBlock::Definition(&inst[1], &inst[1].asOperands[0])); - - // t0.x from i5, but nothing from i6 - t[0].insert(BasicBlock::Definition(&inst[5], &inst[5].asOperands[0])); - CHECK(b1->Reachable() == t); - - // B2 - t.clear(); - t[0].insert(BasicBlock::Definition(&inst[5], &inst[5].asOperands[0])); - t[4].insert(BasicBlock::Definition(&inst[6], &inst[6].asOperands[0])); - CHECK(b2->DEDef() == t); - - t = b2->DEDef(); - t[5].insert(BasicBlock::Definition(&inst[1], &inst[1].asOperands[0])); - - CHECK(b2->Reachable() == t); - - // B3 - t.clear(); - CHECK(b3->DEDef() == t); - // t0.x from i0, t1.y from i1 - t[0].insert(BasicBlock::Definition(&inst[0], &inst[0].asOperands[0])); - t[5].insert(BasicBlock::Definition(&inst[1], &inst[1].asOperands[0])); - - // t1.x from i3 - t[4].insert(BasicBlock::Definition(&inst[3], &inst[3].asOperands[0])); - - // t0.x from i5 - t[0].insert(BasicBlock::Definition(&inst[5], &inst[5].asOperands[0])); - - CHECK(b3->Reachable() == t); - } + TEST(ControlFlowGraph_Build_Simple_Works) + { + Instruction inst[] = + { + // MOV t0.xyzw, I0.xyzw + Instruction(0, OPCODE_MOV, 0, 0xf, 0xffffffff, 0xf), + Instruction(1, OPCODE_RET) + }; + + ControlFlowGraph cfg; + const BasicBlock &root = cfg.Build(inst); + + CHECK_EQUAL(&inst[0], root.First()); + CHECK_EQUAL(&inst[1], root.Last()); + + CHECK(root.Preceding().empty()); + CHECK(root.Succeeding().empty()); + + CHECK_EQUAL(4, root.VarKill().size()); + + // Check that all components from t0 are killed + CHECK_EQUAL(1, root.VarKill().count(0)); + CHECK_EQUAL(1, root.VarKill().count(1)); + CHECK_EQUAL(1, root.VarKill().count(2)); + CHECK_EQUAL(1, root.VarKill().count(3)); + + CHECK_EQUAL(&inst[0], root.DEDef().find(0)->second.begin()->m_Instruction); + CHECK_EQUAL(&inst[0].asOperands[0], root.DEDef().find(0)->second.begin()->m_Operand); + CHECK_EQUAL(&inst[0], root.DEDef().find(1)->second.begin()->m_Instruction); + CHECK_EQUAL(&inst[0].asOperands[0], root.DEDef().find(1)->second.begin()->m_Operand); + CHECK_EQUAL(&inst[0], root.DEDef().find(2)->second.begin()->m_Instruction); + CHECK_EQUAL(&inst[0].asOperands[0], root.DEDef().find(2)->second.begin()->m_Operand); + CHECK_EQUAL(&inst[0], root.DEDef().find(3)->second.begin()->m_Instruction); + CHECK_EQUAL(&inst[0].asOperands[0], root.DEDef().find(3)->second.begin()->m_Operand); + } + + TEST(ControlFlowGraph_Build_If_Works) + { + Instruction inst[] = + { + // B0 + // 0: MOV t1.xyzw, i0.xyzw + Instruction(0, OPCODE_MOV, 1, 0xf, 0xffffffff, 0xf), + // 1: MUL t0, t1, t1 + Instruction(1, OPCODE_MUL, 0, 0xf, 1, 0xf, 1, 0xf), + // 2: IF t1.y + Instruction(2, OPCODE_IF, 1, 2), + // B1 + // 3: MOV o0, t0 + Instruction(3, OPCODE_MOV, 0xffffffff, 0xf, 0, 0xf), + // 4: + Instruction(4, OPCODE_ELSE), + // B2 + // 5: MOV o0, t1 + Instruction(5, OPCODE_MOV, 0xffffffff, 0xf, 1, 0xf), + // 6: + Instruction(6, OPCODE_ENDIF), + // B3 + // 7: + Instruction(7, OPCODE_NOP), + // 8: + Instruction(8, OPCODE_RET) + }; + + ControlFlowGraph cfg; + const BasicBlock &root = cfg.Build(inst); + + CHECK_EQUAL(root.First(), &inst[0]); + CHECK_EQUAL(root.Last(), &inst[2]); + + CHECK(root.Preceding().empty()); + + const BasicBlock *b1 = cfg.GetBasicBlockForInstruction(&inst[3]); + const BasicBlock *b2 = cfg.GetBasicBlockForInstruction(&inst[5]); + const BasicBlock *b3 = cfg.GetBasicBlockForInstruction(&inst[7]); + + CHECK(b1 != NULL); + CHECK(b2 != NULL); + CHECK(b3 != NULL); + + CHECK_EQUAL(&inst[3], b1->First()); + CHECK_EQUAL(&inst[5], b2->First()); + CHECK_EQUAL(&inst[7], b3->First()); + + CHECK_EQUAL(&inst[4], b1->Last()); + CHECK_EQUAL(&inst[6], b2->Last()); + CHECK_EQUAL(&inst[8], b3->Last()); + + CHECK_EQUAL(1, root.Succeeding().count(&inst[3])); + CHECK_EQUAL(1, root.Succeeding().count(&inst[5])); + CHECK_EQUAL(2, root.Succeeding().size()); + + CHECK_EQUAL(1, b1->Preceding().size()); + CHECK_EQUAL(1, b1->Preceding().count(&inst[0])); + + CHECK_EQUAL(1, b2->Preceding().size()); + CHECK_EQUAL(1, b2->Preceding().count(&inst[0])); + + CHECK_EQUAL(2, b3->Preceding().size()); + CHECK_EQUAL(0, b3->Preceding().count(&inst[0])); + CHECK_EQUAL(1, b3->Preceding().count(&inst[3])); + CHECK_EQUAL(1, b3->Preceding().count(&inst[5])); + + // The if block must have upwards-exposed t0 + CHECK_EQUAL(1, b1->UEVar().count(0)); + CHECK_EQUAL(1, b1->UEVar().count(1)); + CHECK_EQUAL(1, b1->UEVar().count(2)); + CHECK_EQUAL(1, b1->UEVar().count(3)); + + // The else block must have upwards-exposed t1 + CHECK_EQUAL(1, b2->UEVar().count(4)); + CHECK_EQUAL(1, b2->UEVar().count(5)); + CHECK_EQUAL(1, b2->UEVar().count(6)); + CHECK_EQUAL(1, b2->UEVar().count(7)); + + CHECK_EQUAL(8, root.VarKill().size()); + + // Check that all components from t0 and t1 are killed + CHECK_EQUAL(1, root.VarKill().count(0)); + CHECK_EQUAL(1, root.VarKill().count(1)); + CHECK_EQUAL(1, root.VarKill().count(2)); + CHECK_EQUAL(1, root.VarKill().count(3)); + + CHECK_EQUAL(1, root.VarKill().count(4)); + CHECK_EQUAL(1, root.VarKill().count(5)); + CHECK_EQUAL(1, root.VarKill().count(6)); + CHECK_EQUAL(1, root.VarKill().count(7)); + + // The expected downwards-exposed definitions: + // B0: t0, t1 + // B1-B3: none + + CHECK_EQUAL(8, root.DEDef().size()); + CHECK_EQUAL(0, b1->DEDef().size()); + CHECK_EQUAL(0, b2->DEDef().size()); + CHECK_EQUAL(0, b3->DEDef().size()); + + CHECK(root.DEDef() == root.Reachable()); + + CHECK(root.Reachable() == b1->Reachable()); + CHECK(root.Reachable() == b2->Reachable()); + CHECK(root.Reachable() == b3->Reachable()); + } + + TEST(ControlFlowGraph_Build_SwitchCase_Works) + { + Instruction inst[] = + { + // Start B0 + // i0: MOV t0.x, I0.x + Instruction(0, OPCODE_MOV, 0, 1, 0xffffffff, 1), + // i1: MOVE t1.xyz, I0.yzw + Instruction(1, OPCODE_MOV, 1, 7, 0xffffffff, 0xe), + // i2: MOVE t1.w, t0.x + Instruction(2, OPCODE_MOV, 1, 8, 0xffffffff, 0x1), + // i3: MOVE t2, I0 + Instruction(3, OPCODE_MOV, 2, 0xf, 0xffffffff, 0xf), + // i4: SWITCH t0.y + Instruction(4, OPCODE_SWITCH, 1, 2), + // End B0 + // i5: CASE + Instruction(5, OPCODE_CASE), + // i6: DEFAULT + Instruction(6, OPCODE_DEFAULT), + // Start B1 + // i7: MOC t1.z, t0.x + Instruction(7, OPCODE_MOV, 1, 4, 0, 1), + // i8: CASE + Instruction(8, OPCODE_CASE), + // End B1 + // Start B2 + // i9: MOV t1.z, t2.x + Instruction(9, OPCODE_MOV, 1, 4, 2, 1), + // i10: BREAK + Instruction(10, OPCODE_BREAK), + // End B2 + // i11: CASE + Instruction(11, OPCODE_CASE), + // Start B3 + // i12: MOV t1.z, t2.y + Instruction(12, OPCODE_MOV, 1, 4, 2, 2), + // i13: BREAKC t0.x + Instruction(13, OPCODE_BREAKC, 0, 1), + // End B3 + // i14: CASE + Instruction(14, OPCODE_CASE), + // Start B4 + // i15: MOV t1.z, t2.z + Instruction(15, OPCODE_MOV, 1, 4, 2, 4), + // i16: ENDSWITCH + Instruction(16, OPCODE_ENDSWITCH), + // End B4 + // Start B5 + // i17: MOV o0, t1 + Instruction(17, OPCODE_MOV, 0xffffffff, 0xf, 1, 0xf), + // i18: RET + Instruction(18, OPCODE_RET) + // End B5 + }; + + ControlFlowGraph cfg; + const BasicBlock &root = cfg.Build(inst); + + CHECK_EQUAL(&inst[0], root.First()); + CHECK_EQUAL(&inst[4], root.Last()); + + const BasicBlock *b1 = cfg.GetBasicBlockForInstruction(&inst[7]); + const BasicBlock *b2 = cfg.GetBasicBlockForInstruction(&inst[9]); + const BasicBlock *b3 = cfg.GetBasicBlockForInstruction(&inst[12]); + const BasicBlock *b4 = cfg.GetBasicBlockForInstruction(&inst[15]); + const BasicBlock *b5 = cfg.GetBasicBlockForInstruction(&inst[17]); + + CHECK(b1 != NULL); + CHECK(b2 != NULL); + CHECK(b3 != NULL); + CHECK(b4 != NULL); + CHECK(b5 != NULL); + + // Check instruction ranges + CHECK_EQUAL(&inst[8], b1->Last()); + CHECK_EQUAL(&inst[10], b2->Last()); + CHECK_EQUAL(&inst[13], b3->Last()); + CHECK_EQUAL(&inst[16], b4->Last()); + CHECK_EQUAL(&inst[18], b5->Last()); + + // Nothing before the root, nothing after b5 + CHECK(root.Preceding().empty()); + CHECK(b5->Succeeding().empty()); + + // Check that all connections are there and no others. + + // B0->B1 + // B0->B2 + // B0->B3 + // B0->B4 + CHECK_EQUAL(1, root.Succeeding().count(&inst[7])); + CHECK_EQUAL(1, root.Succeeding().count(&inst[9])); + CHECK_EQUAL(1, root.Succeeding().count(&inst[12])); + CHECK_EQUAL(1, root.Succeeding().count(&inst[15])); + + CHECK_EQUAL(4, root.Succeeding().size()); + + // B1 + + // B1->B2 + CHECK_EQUAL(1, b1->Succeeding().count(&inst[9])); + CHECK_EQUAL(1, b1->Succeeding().size()); + + // B0->B1, reverse + CHECK_EQUAL(1, b1->Preceding().count(&inst[0])); + CHECK_EQUAL(1, b1->Preceding().size()); + + // B2 + + // B2->B5 + CHECK_EQUAL(1, b2->Succeeding().count(&inst[17])); + CHECK_EQUAL(1, b2->Succeeding().size()); + CHECK_EQUAL(1, b2->Preceding().count(&inst[7])); + CHECK_EQUAL(1, b2->Preceding().count(&inst[0])); + CHECK_EQUAL(2, b2->Preceding().size()); + + // B3 + // B3->B4 + // B3->B5 + CHECK_EQUAL(1, b3->Succeeding().count(&inst[15])); + CHECK_EQUAL(1, b3->Succeeding().count(&inst[17])); + CHECK_EQUAL(2, b3->Succeeding().size()); + CHECK_EQUAL(1, b3->Preceding().count(&inst[0])); + CHECK_EQUAL(1, b3->Preceding().size()); + + // B4 + CHECK_EQUAL(1, b4->Succeeding().count(&inst[17])); + CHECK_EQUAL(1, b4->Succeeding().size()); + CHECK_EQUAL(1, b4->Preceding().count(&inst[0])); + CHECK_EQUAL(2, b4->Preceding().size()); + + // B5 + CHECK_EQUAL(0, b5->Succeeding().size()); + CHECK_EQUAL(3, b5->Preceding().size()); //b2, b3, b4 + CHECK_EQUAL(1, b5->Preceding().count(&inst[9])); + CHECK_EQUAL(1, b5->Preceding().count(&inst[12])); + CHECK_EQUAL(1, b5->Preceding().count(&inst[15])); + + + // Verify reachable sets + + CHECK(root.Reachable() == root.DEDef()); + CHECK_EQUAL(9, root.Reachable().size()); + + // B5 should have these reachables: + // t0.x only from b0 + // t1.xy from b0, i1 + // t1.z from b2,i9 + b3,i12 + b4,i15 (the defs from b0 and b1 are killed by b2) + // t1.w from b0, i2 + // t2.xyzw from b0, i3 + + // Cast away const so [] works. + BasicBlock::ReachableVariables &r = (BasicBlock::ReachableVariables &)b5->Reachable(); + + CHECK_EQUAL(9, r.size()); + + CHECK_EQUAL(1, r[0].size()); + CHECK_EQUAL(0, r[1].size()); + CHECK_EQUAL(0, r[2].size()); + CHECK_EQUAL(0, r[3].size()); + CHECK_EQUAL(&inst[0], r[0].begin()->m_Instruction); + + CHECK_EQUAL(1, r[4].size()); + CHECK_EQUAL(1, r[5].size()); + CHECK_EQUAL(3, r[6].size()); + CHECK_EQUAL(1, r[7].size()); + + const BasicBlock::ReachableDefinitionsPerVariable &d = r[6]; + BasicBlock::ReachableDefinitionsPerVariable t; + t.insert(BasicBlock::Definition(&inst[9], &inst[9].asOperands[0])); + t.insert(BasicBlock::Definition(&inst[12], &inst[12].asOperands[0])); + t.insert(BasicBlock::Definition(&inst[15], &inst[15].asOperands[0])); + + CHECK(t == d); + + CHECK_EQUAL(1, r[8].size()); + CHECK_EQUAL(1, r[9].size()); + CHECK_EQUAL(1, r[10].size()); + CHECK_EQUAL(1, r[11].size()); + } + + TEST(ControlFlowGraph_Build_Loop_Works) + { + Instruction inst[] = + { + // Start B0 + // i0: MOV t0.x, I0.x + Instruction(0, OPCODE_MOV, 0, 1, 0xffffffff, 1), + // i1: MOVE t1.xy, I0.zw // The .x definition should not make it past the loop, .y should. + Instruction(1, OPCODE_MOV, 1, 3, 0xffffffff, 0xc), + // i2: LOOP + Instruction(2, OPCODE_LOOP, 1, 2), + // End B0 -> B1 + // Begin B1 + // i3: MOV t1.x, t0.x + Instruction(3, OPCODE_MOV, 1, 1, 0, 1), + // i4: BREAKC t0.x + Instruction(4, OPCODE_BREAKC, 0, 1), + // End B1 -> B2, B3 + // Begin B2 + // i5: ADD t0.x, t0.y + Instruction(5, OPCODE_ADD, 0, 1, 0, 2), + // i6: MOV t1.x, t0.x // This should never show up as definition + Instruction(6, OPCODE_MOV, 1, 1, 0, 1), + // i7: ENDLOOP + Instruction(7, OPCODE_ENDLOOP), + // End B2 -> B1 + // Start B3 + // i8: MOV O0.x, t1.x + Instruction(8, OPCODE_MOV, 0xffffffff, 1, 1, 1), + // i9: RET + Instruction(9, OPCODE_RET), + // End B3 + }; + + ControlFlowGraph cfg; + const BasicBlock &root = cfg.Build(inst); + + CHECK_EQUAL(&inst[0], root.First()); + CHECK_EQUAL(&inst[2], root.Last()); + + const BasicBlock *b1 = cfg.GetBasicBlockForInstruction(&inst[3]); + const BasicBlock *b2 = cfg.GetBasicBlockForInstruction(&inst[5]); + const BasicBlock *b3 = cfg.GetBasicBlockForInstruction(&inst[8]); + + CHECK(b1 != NULL); + CHECK(b2 != NULL); + CHECK(b3 != NULL); + + // Check instruction ranges + CHECK_EQUAL(&inst[4], b1->Last()); + CHECK_EQUAL(&inst[7], b2->Last()); + CHECK_EQUAL(&inst[9], b3->Last()); + + // Nothing before the root, nothing after b3 + CHECK(root.Preceding().empty()); + CHECK(b3->Succeeding().empty()); + + // Check that all connections are there and no others. + + // B0->B1 + CHECK_EQUAL(1, root.Succeeding().count(&inst[3])); + CHECK_EQUAL(1, root.Succeeding().size()); + + // B1 + + // B1->B2 + // B1->B3 + CHECK_EQUAL(1, b1->Succeeding().count(&inst[5])); + CHECK_EQUAL(1, b1->Succeeding().count(&inst[8])); + CHECK_EQUAL(2, b1->Succeeding().size()); + + // B0->B1, reverse + CHECK_EQUAL(1, b1->Preceding().count(&inst[0])); + // We may also come from B2 + CHECK_EQUAL(1, b1->Preceding().count(&inst[5])); + CHECK_EQUAL(2, b1->Preceding().size()); + + // B2 + + // B2->B1 + CHECK_EQUAL(1, b2->Succeeding().count(&inst[3])); + CHECK_EQUAL(1, b2->Succeeding().size()); + CHECK_EQUAL(1, b2->Preceding().count(&inst[3])); + CHECK_EQUAL(1, b2->Preceding().size()); + + // B3 + CHECK_EQUAL(1, b3->Preceding().count(&inst[3])); + CHECK_EQUAL(1, b3->Preceding().size()); + + // Verify reachable sets + + + BasicBlock::ReachableVariables t; + + // B0 DEDef and Reachable + t.clear(); + t[0].insert(BasicBlock::Definition(&inst[0], &inst[0].asOperands[0])); + t[4].insert(BasicBlock::Definition(&inst[1], &inst[1].asOperands[0])); + t[5].insert(BasicBlock::Definition(&inst[1], &inst[1].asOperands[0])); + + CHECK(root.DEDef() == t); + CHECK(root.Reachable() == root.DEDef()); + + // B1 DEDef and Reachable + t.clear(); + t[4].insert(BasicBlock::Definition(&inst[3], &inst[3].asOperands[0])); + CHECK(b1->DEDef() == t); + + t = b1->DEDef(); + // t0.x from i0, t1.y (but not .x) from i1 + t[0].insert(BasicBlock::Definition(&inst[0], &inst[0].asOperands[0])); + t[5].insert(BasicBlock::Definition(&inst[1], &inst[1].asOperands[0])); + + // t0.x from i5, but nothing from i6 + t[0].insert(BasicBlock::Definition(&inst[5], &inst[5].asOperands[0])); + CHECK(b1->Reachable() == t); + + // B2 + t.clear(); + t[0].insert(BasicBlock::Definition(&inst[5], &inst[5].asOperands[0])); + t[4].insert(BasicBlock::Definition(&inst[6], &inst[6].asOperands[0])); + CHECK(b2->DEDef() == t); + + t = b2->DEDef(); + t[5].insert(BasicBlock::Definition(&inst[1], &inst[1].asOperands[0])); + CHECK(b2->Reachable() == t); + + // B3 + t.clear(); + CHECK(b3->DEDef() == t); + // t0.x from i0, t1.y from i1 + t[0].insert(BasicBlock::Definition(&inst[0], &inst[0].asOperands[0])); + t[5].insert(BasicBlock::Definition(&inst[1], &inst[1].asOperands[0])); + + // t1.x from i3 + t[4].insert(BasicBlock::Definition(&inst[3], &inst[3].asOperands[0])); + + // t0.x from i5 + t[0].insert(BasicBlock::Definition(&inst[5], &inst[5].asOperands[0])); + + CHECK(b3->Reachable() == t); + } } #endif - diff --git a/src/ControlFlowGraphUtils.cpp b/src/ControlFlowGraphUtils.cpp index 648b469..b74fbed 100644 --- a/src/ControlFlowGraphUtils.cpp +++ b/src/ControlFlowGraphUtils.cpp @@ -1,4 +1,3 @@ - #include "ControlFlowGraphUtils.h" #include "internal_includes/debug.h" @@ -6,22 +5,20 @@ #include "internal_includes/Operand.h" - // Get the next instruction that's not one of CASE, DEFAULT, LOOP, ENDSWITCH const Instruction *HLSLcc::ControlFlow::Utils::GetNextNonLabelInstruction(const Instruction *psStart, bool *sawEndSwitch /*= 0*/) { - const Instruction *inst = psStart; - // Skip CASE/DEFAULT/ENDSWITCH/LOOP labels - while (inst->eOpcode == OPCODE_CASE || inst->eOpcode == OPCODE_DEFAULT || inst->eOpcode == OPCODE_ENDSWITCH || inst->eOpcode == OPCODE_LOOP) - { - // We really shouldn't be seeing ENDSWITCH without sawEndSwitch being set (as in, we're expecting it) - ASSERT(inst->eOpcode != OPCODE_ENDSWITCH || sawEndSwitch != NULL); - if (inst->eOpcode == OPCODE_ENDSWITCH && sawEndSwitch != NULL) - *sawEndSwitch = true; - inst++; - } - return inst; - + const Instruction *inst = psStart; + // Skip CASE/DEFAULT/ENDSWITCH/LOOP labels + while (inst->eOpcode == OPCODE_CASE || inst->eOpcode == OPCODE_DEFAULT || inst->eOpcode == OPCODE_ENDSWITCH || inst->eOpcode == OPCODE_LOOP) + { + // We really shouldn't be seeing ENDSWITCH without sawEndSwitch being set (as in, we're expecting it) + ASSERT(inst->eOpcode != OPCODE_ENDSWITCH || sawEndSwitch != NULL); + if (inst->eOpcode == OPCODE_ENDSWITCH && sawEndSwitch != NULL) + *sawEndSwitch = true; + inst++; + } + return inst; } // For a given flow-control instruction, find the corresponding jump location: @@ -37,85 +34,83 @@ const Instruction *HLSLcc::ControlFlow::Utils::GetNextNonLabelInstruction(const // Always returns the beginning of the next block, so skip multiple CASE/DEFAULT labels etc. const Instruction * HLSLcc::ControlFlow::Utils::GetJumpPoint(const Instruction *psStart, bool *sawEndSwitch /*= 0*/, bool *needConnectToParent /* = 0*/) { - const Instruction *inst = psStart; - int depth = 0; - OPCODE_TYPE op = psStart->eOpcode; - ASSERT(op == OPCODE_IF || op == OPCODE_ELSE || op == OPCODE_BREAK || op == OPCODE_BREAKC - || op == OPCODE_SWITCH || op == OPCODE_CASE || op == OPCODE_DEFAULT - || op == OPCODE_ENDLOOP || op == OPCODE_CONTINUE || op == OPCODE_CONTINUEC); + const Instruction *inst = psStart; + int depth = 0; + OPCODE_TYPE op = psStart->eOpcode; + ASSERT(op == OPCODE_IF || op == OPCODE_ELSE || op == OPCODE_BREAK || op == OPCODE_BREAKC + || op == OPCODE_SWITCH || op == OPCODE_CASE || op == OPCODE_DEFAULT + || op == OPCODE_ENDLOOP || op == OPCODE_CONTINUE || op == OPCODE_CONTINUEC); - switch (op) - { - default: - ASSERT(0); - break; - case OPCODE_IF: - case OPCODE_ELSE: - while (1) - { - inst++; - if ((inst->eOpcode == OPCODE_ELSE || inst->eOpcode == OPCODE_ENDIF) && (depth == 0)) - { - return GetNextNonLabelInstruction(inst + 1, sawEndSwitch); - } - if (inst->eOpcode == OPCODE_IF) - depth++; - if (inst->eOpcode == OPCODE_ENDIF) - depth--; - } - case OPCODE_BREAK: - case OPCODE_BREAKC: - while (1) - { - inst++; - if ((inst->eOpcode == OPCODE_ENDLOOP || inst->eOpcode == OPCODE_ENDSWITCH) && (depth == 0)) - { - return GetNextNonLabelInstruction(inst + 1, sawEndSwitch); - } - if (inst->eOpcode == OPCODE_SWITCH || inst->eOpcode == OPCODE_LOOP) - depth++; - if (inst->eOpcode == OPCODE_ENDSWITCH || inst->eOpcode == OPCODE_ENDLOOP) - depth--; - } - case OPCODE_CONTINUE: - case OPCODE_CONTINUEC: - case OPCODE_ENDLOOP: - while (1) - { - inst--; - if ((inst->eOpcode == OPCODE_LOOP) && (depth == 0)) - { - return GetNextNonLabelInstruction(inst + 1, sawEndSwitch); - } - if (inst->eOpcode == OPCODE_LOOP) - depth--; - if (inst->eOpcode == OPCODE_ENDLOOP) - depth++; - } - case OPCODE_SWITCH: - case OPCODE_CASE: - case OPCODE_DEFAULT: - while (1) - { - inst++; - if ((inst->eOpcode == OPCODE_CASE || inst->eOpcode == OPCODE_DEFAULT || inst->eOpcode == OPCODE_ENDSWITCH) && (depth == 0)) - { - // Note that we'll skip setting sawEndSwitch if inst->eOpcode = OPCODE_ENDSWITCH - // so that BasicBlock::Build can distinguish between there being a direct route - // from SWITCH->ENDSWITCH (CASE followed directly by ENDSWITCH) and not. + switch (op) + { + default: + ASSERT(0); + break; + case OPCODE_IF: + case OPCODE_ELSE: + while (1) + { + inst++; + if ((inst->eOpcode == OPCODE_ELSE || inst->eOpcode == OPCODE_ENDIF) && (depth == 0)) + { + return GetNextNonLabelInstruction(inst + 1, sawEndSwitch); + } + if (inst->eOpcode == OPCODE_IF) + depth++; + if (inst->eOpcode == OPCODE_ENDIF) + depth--; + } + case OPCODE_BREAK: + case OPCODE_BREAKC: + while (1) + { + inst++; + if ((inst->eOpcode == OPCODE_ENDLOOP || inst->eOpcode == OPCODE_ENDSWITCH) && (depth == 0)) + { + return GetNextNonLabelInstruction(inst + 1, sawEndSwitch); + } + if (inst->eOpcode == OPCODE_SWITCH || inst->eOpcode == OPCODE_LOOP) + depth++; + if (inst->eOpcode == OPCODE_ENDSWITCH || inst->eOpcode == OPCODE_ENDLOOP) + depth--; + } + case OPCODE_CONTINUE: + case OPCODE_CONTINUEC: + case OPCODE_ENDLOOP: + while (1) + { + inst--; + if ((inst->eOpcode == OPCODE_LOOP) && (depth == 0)) + { + return GetNextNonLabelInstruction(inst + 1, sawEndSwitch); + } + if (inst->eOpcode == OPCODE_LOOP) + depth--; + if (inst->eOpcode == OPCODE_ENDLOOP) + depth++; + } + case OPCODE_SWITCH: + case OPCODE_CASE: + case OPCODE_DEFAULT: + while (1) + { + inst++; + if ((inst->eOpcode == OPCODE_CASE || inst->eOpcode == OPCODE_DEFAULT || inst->eOpcode == OPCODE_ENDSWITCH) && (depth == 0)) + { + // Note that we'll skip setting sawEndSwitch if inst->eOpcode = OPCODE_ENDSWITCH + // so that BasicBlock::Build can distinguish between there being a direct route + // from SWITCH->ENDSWITCH (CASE followed directly by ENDSWITCH) and not. - if (inst->eOpcode == OPCODE_ENDSWITCH && sawEndSwitch != 0) - *sawEndSwitch = true; + if (inst->eOpcode == OPCODE_ENDSWITCH && sawEndSwitch != 0) + *sawEndSwitch = true; - return GetNextNonLabelInstruction(inst + 1, needConnectToParent); - } - if (inst->eOpcode == OPCODE_SWITCH) - depth++; - if (inst->eOpcode == OPCODE_ENDSWITCH) - depth--; - } - - } - return 0; + return GetNextNonLabelInstruction(inst + 1, needConnectToParent); + } + if (inst->eOpcode == OPCODE_SWITCH) + depth++; + if (inst->eOpcode == OPCODE_ENDSWITCH) + depth--; + } + } + return 0; } - diff --git a/src/DataTypeAnalysis.cpp b/src/DataTypeAnalysis.cpp index 2378ccc..8fa463f 100644 --- a/src/DataTypeAnalysis.cpp +++ b/src/DataTypeAnalysis.cpp @@ -1,4 +1,3 @@ - #include "internal_includes/debug.h" #include "internal_includes/tokens.h" #include "internal_includes/HLSLccToolkit.h" @@ -15,768 +14,758 @@ // to the "highest" type value (ordering int->uint->float) static void SetVectorType(std::vector &aeTempVecType, uint32_t regBaseIndex, uint32_t componentMask, SHADER_VARIABLE_TYPE eType, int *psMadeProgress) { - int i = 0; - - // Expand the mask to include all components that are used, also upgrade type - for (i = 0; i < 4; i++) - { - if (aeTempVecType[regBaseIndex + i] != SVT_VOID) - { - componentMask |= (1 << i); - eType = HLSLcc::SelectHigherType(eType, aeTempVecType[regBaseIndex + i]); - } - } - - // Now componentMask contains the components we actually need to update and eType may have been changed to something else. - // Write the results - for (i = 0; i < 4; i++) - { - if (componentMask & (1 << i)) - { - if (aeTempVecType[regBaseIndex + i] != eType) - { - aeTempVecType[regBaseIndex + i] = eType; - if (psMadeProgress) - *psMadeProgress = 1; - } - } - } - + int i = 0; + + // Expand the mask to include all components that are used, also upgrade type + for (i = 0; i < 4; i++) + { + if (aeTempVecType[regBaseIndex + i] != SVT_VOID) + { + componentMask |= (1 << i); + eType = HLSLcc::SelectHigherType(eType, aeTempVecType[regBaseIndex + i]); + } + } + + // Now componentMask contains the components we actually need to update and eType may have been changed to something else. + // Write the results + for (i = 0; i < 4; i++) + { + if (componentMask & (1 << i)) + { + if (aeTempVecType[regBaseIndex + i] != eType) + { + aeTempVecType[regBaseIndex + i] = eType; + if (psMadeProgress) + *psMadeProgress = 1; + } + } + } } static SHADER_VARIABLE_TYPE OperandPrecisionToShaderVariableType(OPERAND_MIN_PRECISION prec, SHADER_VARIABLE_TYPE eDefault) { - SHADER_VARIABLE_TYPE eType = eDefault; - switch (prec) - { - case OPERAND_MIN_PRECISION_DEFAULT: - break; - case OPERAND_MIN_PRECISION_SINT_16: - eType = SVT_INT16; - break; - case OPERAND_MIN_PRECISION_UINT_16: - eType = SVT_UINT16; - break; - case OPERAND_MIN_PRECISION_FLOAT_2_8: - eType = SVT_FLOAT10; - break; - case OPERAND_MIN_PRECISION_FLOAT_16: - eType = SVT_FLOAT16; - break; - default: - ASSERT(0); // Catch this to see what's going on. - break; - } - return eType; - + SHADER_VARIABLE_TYPE eType = eDefault; + switch (prec) + { + case OPERAND_MIN_PRECISION_DEFAULT: + break; + case OPERAND_MIN_PRECISION_SINT_16: + eType = SVT_INT16; + break; + case OPERAND_MIN_PRECISION_UINT_16: + eType = SVT_UINT16; + break; + case OPERAND_MIN_PRECISION_FLOAT_2_8: + eType = SVT_FLOAT10; + break; + case OPERAND_MIN_PRECISION_FLOAT_16: + eType = SVT_FLOAT16; + break; + default: + ASSERT(0); // Catch this to see what's going on. + break; + } + return eType; } static void MarkOperandAs(Operand *psOperand, SHADER_VARIABLE_TYPE eType, std::vector &aeTempVecType) { - if (psOperand->eType == OPERAND_TYPE_TEMP) - { - const uint32_t ui32RegIndex = psOperand->ui32RegisterNumber * 4; - uint32_t mask = psOperand->GetAccessMask(); - // Adjust type based on operand precision - eType = OperandPrecisionToShaderVariableType(psOperand->eMinPrecision, eType); - - SetVectorType(aeTempVecType, ui32RegIndex, mask, eType, NULL); - } + if (psOperand->eType == OPERAND_TYPE_TEMP) + { + const uint32_t ui32RegIndex = psOperand->ui32RegisterNumber * 4; + uint32_t mask = psOperand->GetAccessMask(); + // Adjust type based on operand precision + eType = OperandPrecisionToShaderVariableType(psOperand->eMinPrecision, eType); + + SetVectorType(aeTempVecType, ui32RegIndex, mask, eType, NULL); + } } static void MarkAllOperandsAs(Instruction* psInst, SHADER_VARIABLE_TYPE eType, std::vector &aeTempVecType) { - uint32_t i = 0; - for (i = 0; i < psInst->ui32NumOperands; i++) - { - MarkOperandAs(&psInst->asOperands[i], eType, aeTempVecType); - } + uint32_t i = 0; + for (i = 0; i < psInst->ui32NumOperands; i++) + { + MarkOperandAs(&psInst->asOperands[i], eType, aeTempVecType); + } } // Mark scalars from CBs. TODO: Do we need to do the same for vec2/3's as well? There may be swizzles involved which make it vec4 or something else again. static void SetCBOperandComponents(HLSLCrossCompilerContext *psContext, Operand *psOperand) { - const ConstantBuffer* psCBuf = NULL; - const ShaderVarType* psVarType = NULL; - int32_t rebase = 0; - bool isArray; - - if (psOperand->eType != OPERAND_TYPE_CONSTANT_BUFFER) - return; - - // Ignore selection modes that access more than one component - switch (psOperand->eSelMode) - { - case OPERAND_4_COMPONENT_SELECT_1_MODE: - break; - case OPERAND_4_COMPONENT_SWIZZLE_MODE: - if (!psOperand->IsSwizzleReplicated()) - return; - break; - case OPERAND_4_COMPONENT_MASK_MODE: - return; - } - - psContext->psShader->sInfo.GetConstantBufferFromBindingPoint(RGROUP_CBUFFER, psOperand->aui32ArraySizes[0], &psCBuf); - ShaderInfo::GetShaderVarFromOffset(psOperand->aui32ArraySizes[1], psOperand->aui32Swizzle, psCBuf, &psVarType, &isArray, NULL, &rebase, psContext->flags); - - if (psVarType->Class == SVC_SCALAR) - psOperand->iNumComponents = 1; - + const ConstantBuffer* psCBuf = NULL; + const ShaderVarType* psVarType = NULL; + int32_t rebase = 0; + bool isArray; + + if (psOperand->eType != OPERAND_TYPE_CONSTANT_BUFFER) + return; + + // Ignore selection modes that access more than one component + switch (psOperand->eSelMode) + { + case OPERAND_4_COMPONENT_SELECT_1_MODE: + break; + case OPERAND_4_COMPONENT_SWIZZLE_MODE: + if (!psOperand->IsSwizzleReplicated()) + return; + break; + case OPERAND_4_COMPONENT_MASK_MODE: + return; + } + + psContext->psShader->sInfo.GetConstantBufferFromBindingPoint(RGROUP_CBUFFER, psOperand->aui32ArraySizes[0], &psCBuf); + ShaderInfo::GetShaderVarFromOffset(psOperand->aui32ArraySizes[1], psOperand->aui32Swizzle, psCBuf, &psVarType, &isArray, NULL, &rebase, psContext->flags); + + if (psVarType->Class == SVC_SCALAR) + psOperand->iNumComponents = 1; } struct SetPartialDataTypes { - SetPartialDataTypes(SHADER_VARIABLE_TYPE *_aeTempVec) - : m_TempVec(_aeTempVec) - {} - SHADER_VARIABLE_TYPE *m_TempVec; - - template void operator()(ItrType inst, Operand *psOperand, uint32_t ui32OperandType) const - { - uint32_t mask = 0; - SHADER_VARIABLE_TYPE *aeTempVecType = m_TempVec; - SHADER_VARIABLE_TYPE newType; - uint32_t i, reg; - if (psOperand->eType != OPERAND_TYPE_TEMP) - return; - - if (ui32OperandType == FEO_FLAG_SUBOPERAND) - { - // We really shouldn't ever be getting minprecision float indices here - ASSERT(psOperand->eMinPrecision != OPERAND_MIN_PRECISION_FLOAT_16 && psOperand->eMinPrecision != OPERAND_MIN_PRECISION_FLOAT_2_8); - - mask = psOperand->GetAccessMask(); - reg = psOperand->ui32RegisterNumber; - newType = OperandPrecisionToShaderVariableType(psOperand->eMinPrecision, SVT_INT_AMBIGUOUS); - for (i = 0; i < 4; i++) - { - if (!(mask & (1 << i))) - continue; - if (aeTempVecType[reg * 4 + i] == SVT_VOID) - aeTempVecType[reg * 4 + i] = newType; - } - return; - - } - - if (psOperand->eMinPrecision == OPERAND_MIN_PRECISION_DEFAULT) - return; - - mask = psOperand->GetAccessMask(); - reg = psOperand->ui32RegisterNumber; - newType = OperandPrecisionToShaderVariableType(psOperand->eMinPrecision, SVT_VOID); - ASSERT(newType != SVT_VOID); - for (i = 0; i < 4; i++) - { - if (!(mask & (1 << i))) - continue; - aeTempVecType[reg * 4 + i] = newType; - } - return; - } + SetPartialDataTypes(SHADER_VARIABLE_TYPE *_aeTempVec) + : m_TempVec(_aeTempVec) + {} + SHADER_VARIABLE_TYPE *m_TempVec; + + template void operator()(ItrType inst, Operand *psOperand, uint32_t ui32OperandType) const + { + uint32_t mask = 0; + SHADER_VARIABLE_TYPE *aeTempVecType = m_TempVec; + SHADER_VARIABLE_TYPE newType; + uint32_t i, reg; + if (psOperand->eType != OPERAND_TYPE_TEMP) + return; + + if (ui32OperandType == FEO_FLAG_SUBOPERAND) + { + // We really shouldn't ever be getting minprecision float indices here + ASSERT(psOperand->eMinPrecision != OPERAND_MIN_PRECISION_FLOAT_16 && psOperand->eMinPrecision != OPERAND_MIN_PRECISION_FLOAT_2_8); + + mask = psOperand->GetAccessMask(); + reg = psOperand->ui32RegisterNumber; + newType = OperandPrecisionToShaderVariableType(psOperand->eMinPrecision, SVT_INT_AMBIGUOUS); + for (i = 0; i < 4; i++) + { + if (!(mask & (1 << i))) + continue; + if (aeTempVecType[reg * 4 + i] == SVT_VOID) + aeTempVecType[reg * 4 + i] = newType; + } + return; + } + + if (psOperand->eMinPrecision == OPERAND_MIN_PRECISION_DEFAULT) + return; + + mask = psOperand->GetAccessMask(); + reg = psOperand->ui32RegisterNumber; + newType = OperandPrecisionToShaderVariableType(psOperand->eMinPrecision, SVT_VOID); + ASSERT(newType != SVT_VOID); + for (i = 0; i < 4; i++) + { + if (!(mask & (1 << i))) + continue; + aeTempVecType[reg * 4 + i] = newType; + } + } }; // Write back the temp datatypes into operands. Also mark scalars in constant buffers struct WritebackDataTypes { - WritebackDataTypes(HLSLCrossCompilerContext *_ctx, SHADER_VARIABLE_TYPE *_aeTempVec) - : m_Context(_ctx) - , m_TempVec(_aeTempVec) - {} - HLSLCrossCompilerContext *m_Context; - SHADER_VARIABLE_TYPE *m_TempVec; + WritebackDataTypes(HLSLCrossCompilerContext *_ctx, SHADER_VARIABLE_TYPE *_aeTempVec) + : m_Context(_ctx) + , m_TempVec(_aeTempVec) + {} + HLSLCrossCompilerContext *m_Context; + SHADER_VARIABLE_TYPE *m_TempVec; - template void operator()(ItrType inst, Operand *psOperand, uint32_t ui32OperandType) const - { - SHADER_VARIABLE_TYPE *aeTempVecType = m_TempVec; - uint32_t reg, mask, i; - SHADER_VARIABLE_TYPE dtype; + template void operator()(ItrType inst, Operand *psOperand, uint32_t ui32OperandType) const + { + SHADER_VARIABLE_TYPE *aeTempVecType = m_TempVec; + uint32_t reg, mask, i; + SHADER_VARIABLE_TYPE dtype; - if (psOperand->eType == OPERAND_TYPE_CONSTANT_BUFFER) - SetCBOperandComponents(m_Context, psOperand); + if (psOperand->eType == OPERAND_TYPE_CONSTANT_BUFFER) + SetCBOperandComponents(m_Context, psOperand); - if (psOperand->eType != OPERAND_TYPE_TEMP) - return; + if (psOperand->eType != OPERAND_TYPE_TEMP) + return; - reg = psOperand->ui32RegisterNumber; - mask = psOperand->GetAccessMask(); - dtype = SVT_VOID; + reg = psOperand->ui32RegisterNumber; + mask = psOperand->GetAccessMask(); + dtype = SVT_VOID; - for (i = 0; i < 4; i++) - { - if (!(mask & (1 << i))) - continue; + for (i = 0; i < 4; i++) + { + if (!(mask & (1 << i))) + continue; - // Check that all components have the same type - ASSERT(dtype == SVT_VOID || dtype == aeTempVecType[reg * 4 + i]); + // Check that all components have the same type + ASSERT(dtype == SVT_VOID || dtype == aeTempVecType[reg * 4 + i]); - dtype = aeTempVecType[reg * 4 + i]; + dtype = aeTempVecType[reg * 4 + i]; - ASSERT(dtype != SVT_VOID); - ASSERT(dtype == OperandPrecisionToShaderVariableType(psOperand->eMinPrecision, dtype)); - - psOperand->aeDataType[i] = dtype; - } - - return; - } + ASSERT(dtype != SVT_VOID); + ASSERT(dtype == OperandPrecisionToShaderVariableType(psOperand->eMinPrecision, dtype)); + psOperand->aeDataType[i] = dtype; + } + } }; void HLSLcc::DataTypeAnalysis::SetDataTypes(HLSLCrossCompilerContext* psContext, std::vector & instructions, uint32_t ui32TempCount, std::vector &results) { - uint32_t i; - Instruction *psFirstInst = &instructions[0]; - Instruction *psInst = psFirstInst; - // Start with void, then move up the chain void->ambiguous int->minprec int/uint->int/uint->minprec float->float - std::vector &aeTempVecType = results; - - aeTempVecType.clear(); - aeTempVecType.resize(ui32TempCount * 4, SVT_VOID); - - if (ui32TempCount == 0) - return; - - // Go through the instructions, pick up partial datatypes, because we at least know those for a fact. - // Also set all suboperands to be integers (they're always used as indices) - ForEachOperand(instructions.begin(), instructions.end(), FEO_FLAG_ALL, SetPartialDataTypes(&aeTempVecType[0])); - - // if (psContext->psShader->ui32MajorVersion <= 3) - { - // First pass, do analysis: deduce the data type based on opcodes, fill out aeTempVecType table - // Only ever to int->float promotion (or int->uint), never the other way around - for (i = 0; i < (uint32_t)instructions.size(); ++i, psInst++) - { - if (psInst->ui32NumOperands == 0) - continue; + uint32_t i; + Instruction *psFirstInst = &instructions[0]; + Instruction *psInst = psFirstInst; + // Start with void, then move up the chain void->ambiguous int->minprec int/uint->int/uint->minprec float->float + std::vector &aeTempVecType = results; + + aeTempVecType.clear(); + aeTempVecType.resize(ui32TempCount * 4, SVT_VOID); + + if (ui32TempCount == 0) + return; + + // Go through the instructions, pick up partial datatypes, because we at least know those for a fact. + // Also set all suboperands to be integers (they're always used as indices) + ForEachOperand(instructions.begin(), instructions.end(), FEO_FLAG_ALL, SetPartialDataTypes(&aeTempVecType[0])); + + // if (psContext->psShader->ui32MajorVersion <= 3) + { + // First pass, do analysis: deduce the data type based on opcodes, fill out aeTempVecType table + // Only ever to int->float promotion (or int->uint), never the other way around + for (i = 0; i < (uint32_t)instructions.size(); ++i, psInst++) + { + if (psInst->ui32NumOperands == 0) + continue; #ifdef _DEBUG - for (int k = 0; k < (int)psInst->ui32NumOperands; k++) - { - if (psInst->asOperands[k].eType == OPERAND_TYPE_TEMP) - { - ASSERT(psInst->asOperands[k].ui32RegisterNumber < ui32TempCount); - } - } + for (int k = 0; k < (int)psInst->ui32NumOperands; k++) + { + if (psInst->asOperands[k].eType == OPERAND_TYPE_TEMP) + { + ASSERT(psInst->asOperands[k].ui32RegisterNumber < ui32TempCount); + } + } #endif - switch (psInst->eOpcode) - { - // All float-only ops - case OPCODE_ADD: - case OPCODE_DERIV_RTX: - case OPCODE_DERIV_RTY: - case OPCODE_DIV: - case OPCODE_DP2: - case OPCODE_DP3: - case OPCODE_DP4: - case OPCODE_EXP: - case OPCODE_FRC: - case OPCODE_LOG: - case OPCODE_MAD: - case OPCODE_MIN: - case OPCODE_MAX: - case OPCODE_MUL: - case OPCODE_ROUND_NE: - case OPCODE_ROUND_NI: - case OPCODE_ROUND_PI: - case OPCODE_ROUND_Z: - case OPCODE_RSQ: - case OPCODE_SAMPLE: - case OPCODE_SAMPLE_C: - case OPCODE_SAMPLE_C_LZ: - case OPCODE_SAMPLE_L: - case OPCODE_SAMPLE_D: - case OPCODE_SAMPLE_B: - case OPCODE_SQRT: - case OPCODE_SINCOS: - case OPCODE_LOD: - case OPCODE_GATHER4: - - case OPCODE_DERIV_RTX_COARSE: - case OPCODE_DERIV_RTX_FINE: - case OPCODE_DERIV_RTY_COARSE: - case OPCODE_DERIV_RTY_FINE: - case OPCODE_GATHER4_C: - case OPCODE_GATHER4_PO: - case OPCODE_GATHER4_PO_C: - case OPCODE_RCP: - - MarkAllOperandsAs(psInst, SVT_FLOAT, aeTempVecType); - break; - - // Comparison ops, need to enable possibility for going boolean - case OPCODE_IEQ: - case OPCODE_INE: - MarkOperandAs(&psInst->asOperands[0], SVT_BOOL, aeTempVecType); - MarkOperandAs(&psInst->asOperands[1], SVT_INT_AMBIGUOUS, aeTempVecType); - MarkOperandAs(&psInst->asOperands[2], SVT_INT_AMBIGUOUS, aeTempVecType); - break; - - case OPCODE_IF: - case OPCODE_BREAKC: - case OPCODE_CALLC: - case OPCODE_CONTINUEC: - case OPCODE_RETC: - MarkOperandAs(&psInst->asOperands[0], SVT_BOOL, aeTempVecType); - break; - - case OPCODE_ILT: - case OPCODE_IGE: - MarkOperandAs(&psInst->asOperands[0], SVT_BOOL, aeTempVecType); - MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType); - MarkOperandAs(&psInst->asOperands[2], SVT_INT, aeTempVecType); - break; - - case OPCODE_ULT: - case OPCODE_UGE: - MarkOperandAs(&psInst->asOperands[0], SVT_BOOL, aeTempVecType); - MarkOperandAs(&psInst->asOperands[1], SVT_UINT, aeTempVecType); - MarkOperandAs(&psInst->asOperands[2], SVT_UINT, aeTempVecType); - break; - - case OPCODE_AND: - case OPCODE_OR: - MarkOperandAs(&psInst->asOperands[0], SVT_INT_AMBIGUOUS, aeTempVecType); - MarkOperandAs(&psInst->asOperands[1], SVT_BOOL, aeTempVecType); - MarkOperandAs(&psInst->asOperands[2], SVT_BOOL, aeTempVecType); - break; - - // Integer ops that don't care of signedness - case OPCODE_IADD: - case OPCODE_INEG: - case OPCODE_ISHL: - case OPCODE_NOT: - case OPCODE_XOR: - case OPCODE_BUFINFO: - case OPCODE_COUNTBITS: - case OPCODE_FIRSTBIT_HI: - case OPCODE_FIRSTBIT_LO: - case OPCODE_FIRSTBIT_SHI: - case OPCODE_BFI: - case OPCODE_BFREV: - case OPCODE_ATOMIC_AND: - case OPCODE_ATOMIC_OR: - case OPCODE_ATOMIC_XOR: - case OPCODE_ATOMIC_CMP_STORE: - case OPCODE_ATOMIC_IADD: - case OPCODE_IMM_ATOMIC_IADD: - case OPCODE_IMM_ATOMIC_AND: - case OPCODE_IMM_ATOMIC_OR: - case OPCODE_IMM_ATOMIC_XOR: - case OPCODE_IMM_ATOMIC_EXCH: - case OPCODE_IMM_ATOMIC_CMP_EXCH: - - - MarkAllOperandsAs(psInst, SVT_INT_AMBIGUOUS, aeTempVecType); - break; - - - // Integer ops - case OPCODE_IMAD: - case OPCODE_IMAX: - case OPCODE_IMIN: - case OPCODE_IMUL: - case OPCODE_ISHR: - case OPCODE_IBFE: - - case OPCODE_ATOMIC_IMAX: - case OPCODE_ATOMIC_IMIN: - case OPCODE_IMM_ATOMIC_IMAX: - case OPCODE_IMM_ATOMIC_IMIN: - MarkAllOperandsAs(psInst, SVT_INT, aeTempVecType); - break; - - - // uint ops - case OPCODE_UDIV: - case OPCODE_UMUL: - case OPCODE_UMAD: - case OPCODE_UMAX: - case OPCODE_UMIN: - case OPCODE_USHR: - case OPCODE_UADDC: - case OPCODE_USUBB: - case OPCODE_ATOMIC_UMAX: - case OPCODE_ATOMIC_UMIN: - case OPCODE_IMM_ATOMIC_UMAX: - case OPCODE_IMM_ATOMIC_UMIN: - case OPCODE_IMM_ATOMIC_ALLOC: - case OPCODE_IMM_ATOMIC_CONSUME: - MarkAllOperandsAs(psInst, SVT_UINT, aeTempVecType); - break; - case OPCODE_UBFE: - MarkOperandAs(&psInst->asOperands[0], SVT_UINT, aeTempVecType); - MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType); - MarkOperandAs(&psInst->asOperands[2], SVT_INT, aeTempVecType); - MarkOperandAs(&psInst->asOperands[3], SVT_UINT, aeTempVecType); - break; - - // Need special handling - case OPCODE_FTOI: - case OPCODE_FTOU: - MarkOperandAs(&psInst->asOperands[0], psInst->eOpcode == OPCODE_FTOI ? SVT_INT : SVT_UINT, aeTempVecType); - MarkOperandAs(&psInst->asOperands[1], SVT_FLOAT, aeTempVecType); - break; - - case OPCODE_GE: - case OPCODE_LT: - case OPCODE_EQ: - case OPCODE_NE: - - MarkOperandAs(&psInst->asOperands[0], SVT_BOOL, aeTempVecType); - MarkOperandAs(&psInst->asOperands[1], SVT_FLOAT, aeTempVecType); - MarkOperandAs(&psInst->asOperands[2], SVT_FLOAT, aeTempVecType); - break; - - case OPCODE_ITOF: - case OPCODE_UTOF: - MarkOperandAs(&psInst->asOperands[0], SVT_FLOAT, aeTempVecType); - MarkOperandAs(&psInst->asOperands[1], psInst->eOpcode == OPCODE_ITOF ? SVT_INT : SVT_UINT, aeTempVecType); - break; - - case OPCODE_LD: - case OPCODE_LD_MS: - // TODO: Would need to know the sampler return type - MarkOperandAs(&psInst->asOperands[0], SVT_FLOAT, aeTempVecType); - MarkOperandAs(&psInst->asOperands[1], SVT_UINT, aeTempVecType); - break; - - case OPCODE_MOVC: - MarkOperandAs(&psInst->asOperands[1], SVT_BOOL, aeTempVecType); - case OPCODE_SWAPC: - MarkOperandAs(&psInst->asOperands[2], SVT_BOOL, aeTempVecType); - break; - - case OPCODE_RESINFO: - // Operand 0 depends on the return type declaration, op 1 is always uint - MarkOperandAs(&psInst->asOperands[1], SVT_UINT, aeTempVecType); - switch (psInst->eResInfoReturnType) - { - default: - case RESINFO_INSTRUCTION_RETURN_FLOAT: - case RESINFO_INSTRUCTION_RETURN_RCPFLOAT: - MarkOperandAs(&psInst->asOperands[0], SVT_FLOAT, aeTempVecType); - break; - case RESINFO_INSTRUCTION_RETURN_UINT: - MarkOperandAs(&psInst->asOperands[0], SVT_UINT, aeTempVecType); - break; - } - - case OPCODE_SAMPLE_INFO: - // Sample_info uses the same RESINFO_RETURN_TYPE for storage. 0 = float, 1 = uint. - MarkOperandAs(&psInst->asOperands[0], psInst->eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_FLOAT ? SVT_FLOAT : SVT_UINT, aeTempVecType); - break; - - case OPCODE_SAMPLE_POS: - MarkOperandAs(&psInst->asOperands[0], SVT_FLOAT, aeTempVecType); - break; - - - case OPCODE_LD_UAV_TYPED: - // translates to gvec4 loadImage(gimage i, ivec p). - MarkOperandAs(&psInst->asOperands[0], SVT_INT, aeTempVecType); - MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType); // ivec p - break; - - case OPCODE_STORE_UAV_TYPED: - // translates to storeImage(gimage i, ivec p, gvec4 data) - MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType); // ivec p - MarkOperandAs(&psInst->asOperands[2], SVT_INT, aeTempVecType); // gvec4 data - break; - - case OPCODE_LD_RAW: - if (psInst->asOperands[2].eType == OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY) - MarkOperandAs(&psInst->asOperands[0], SVT_UINT, aeTempVecType); - else - MarkOperandAs(&psInst->asOperands[0], SVT_FLOAT, aeTempVecType); - MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType); - break; - - case OPCODE_STORE_RAW: - if (psInst->asOperands[0].eType == OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY) - MarkOperandAs(&psInst->asOperands[0], SVT_UINT, aeTempVecType); - else - MarkOperandAs(&psInst->asOperands[0], SVT_FLOAT, aeTempVecType); - MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType); - break; - - case OPCODE_LD_STRUCTURED: - MarkOperandAs(&psInst->asOperands[0], SVT_INT, aeTempVecType); - MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType); - MarkOperandAs(&psInst->asOperands[2], SVT_INT, aeTempVecType); - break; - - case OPCODE_STORE_STRUCTURED: - MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType); - MarkOperandAs(&psInst->asOperands[2], SVT_INT, aeTempVecType); - MarkOperandAs(&psInst->asOperands[3], SVT_INT, aeTempVecType); - break; - - case OPCODE_F32TOF16: - MarkOperandAs(&psInst->asOperands[0], SVT_UINT, aeTempVecType); - MarkOperandAs(&psInst->asOperands[1], SVT_FLOAT, aeTempVecType); - break; - - case OPCODE_F16TOF32: - MarkOperandAs(&psInst->asOperands[0], SVT_FLOAT, aeTempVecType); - MarkOperandAs(&psInst->asOperands[1], SVT_UINT, aeTempVecType); - break; - - - - // No-operands, should never get here anyway - /* case OPCODE_BREAK: - case OPCODE_CALL: - case OPCODE_CASE: - case OPCODE_CONTINUE: - case OPCODE_CUT: - case OPCODE_DEFAULT: - case OPCODE_DISCARD: - case OPCODE_ELSE: - case OPCODE_EMIT: - case OPCODE_EMITTHENCUT: - case OPCODE_ENDIF: - case OPCODE_ENDLOOP: - case OPCODE_ENDSWITCH: - - case OPCODE_LABEL: - case OPCODE_LOOP: - case OPCODE_CUSTOMDATA: - case OPCODE_NOP: - case OPCODE_RET: - case OPCODE_SWITCH: - case OPCODE_DCL_RESOURCE: // DCL* opcodes have - case OPCODE_DCL_CONSTANT_BUFFER: // custom operand formats. - case OPCODE_DCL_SAMPLER: - case OPCODE_DCL_INDEX_RANGE: - case OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY: - case OPCODE_DCL_GS_INPUT_PRIMITIVE: - case OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT: - case OPCODE_DCL_INPUT: - case OPCODE_DCL_INPUT_SGV: - case OPCODE_DCL_INPUT_SIV: - case OPCODE_DCL_INPUT_PS: - case OPCODE_DCL_INPUT_PS_SGV: - case OPCODE_DCL_INPUT_PS_SIV: - case OPCODE_DCL_OUTPUT: - case OPCODE_DCL_OUTPUT_SGV: - case OPCODE_DCL_OUTPUT_SIV: - case OPCODE_DCL_TEMPS: - case OPCODE_DCL_INDEXABLE_TEMP: - case OPCODE_DCL_GLOBAL_FLAGS: - - - case OPCODE_HS_DECLS: // token marks beginning of HS sub-shader - case OPCODE_HS_CONTROL_POINT_PHASE: // token marks beginning of HS sub-shader - case OPCODE_HS_FORK_PHASE: // token marks beginning of HS sub-shader - case OPCODE_HS_JOIN_PHASE: // token marks beginning of HS sub-shader - - case OPCODE_EMIT_STREAM: - case OPCODE_CUT_STREAM: - case OPCODE_EMITTHENCUT_STREAM: - case OPCODE_INTERFACE_CALL: - - - case OPCODE_DCL_STREAM: - case OPCODE_DCL_FUNCTION_BODY: - case OPCODE_DCL_FUNCTION_TABLE: - case OPCODE_DCL_INTERFACE: - - case OPCODE_DCL_INPUT_CONTROL_POINT_COUNT: - case OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT: - case OPCODE_DCL_TESS_DOMAIN: - case OPCODE_DCL_TESS_PARTITIONING: - case OPCODE_DCL_TESS_OUTPUT_PRIMITIVE: - case OPCODE_DCL_HS_MAX_TESSFACTOR: - case OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT: - case OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT: - - case OPCODE_DCL_THREAD_GROUP: - case OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED: - case OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW: - case OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED: - case OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_RAW: - case OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_STRUCTURED: - case OPCODE_DCL_RESOURCE_RAW: - case OPCODE_DCL_RESOURCE_STRUCTURED: - case OPCODE_SYNC: - - // TODO - case OPCODE_DADD: - case OPCODE_DMAX: - case OPCODE_DMIN: - case OPCODE_DMUL: - case OPCODE_DEQ: - case OPCODE_DGE: - case OPCODE_DLT: - case OPCODE_DNE: - case OPCODE_DMOV: - case OPCODE_DMOVC: - case OPCODE_DTOF: - case OPCODE_FTOD: - - case OPCODE_EVAL_SNAPPED: - case OPCODE_EVAL_SAMPLE_INDEX: - case OPCODE_EVAL_CENTROID: - - case OPCODE_DCL_GS_INSTANCE_COUNT: - - case OPCODE_ABORT: - case OPCODE_DEBUG_BREAK:*/ - - default: - break; - } - } - } - - { - int madeProgress = 0; - // Next go through MOV and MOVC and propagate the data type of whichever parameter we happen to have - do - { - madeProgress = 0; - psInst = psFirstInst; - for (i = 0; i < (uint32_t)instructions.size(); ++i, psInst++) - { - if (psInst->eOpcode == OPCODE_MOV || psInst->eOpcode == OPCODE_MOVC) - { - // Figure out the data type - uint32_t k; - SHADER_VARIABLE_TYPE dataType = SVT_VOID; - int foundImmediate = 0; - for (k = 0; k < psInst->ui32NumOperands; k++) - { - uint32_t mask, j; - if (psInst->eOpcode == OPCODE_MOVC && k == 1) - continue; // Ignore the condition operand, it's always int - - if (psInst->asOperands[k].eType == OPERAND_TYPE_IMMEDIATE32) - { - foundImmediate = 1; - continue; // We don't know the data type of immediates yet, but if this is the only one found, mark as int, it'll get promoted later if needed - } - - if (psInst->asOperands[k].eType != OPERAND_TYPE_TEMP) - { - dataType = psInst->asOperands[k].GetDataType(psContext); - break; - } - - if (psInst->asOperands[k].eModifier != OPERAND_MODIFIER_NONE) - { - // If any modifiers are used in MOV or MOVC, that automatically is treated as float. - dataType = SVT_FLOAT; - break; - } - - mask = psInst->asOperands[k].GetAccessMask(); - for (j = 0; j < 4; j++) - { - if (!(mask & (1 << j))) - continue; - if (aeTempVecType[psInst->asOperands[k].ui32RegisterNumber * 4 + j] != SVT_VOID) - { - dataType = HLSLcc::SelectHigherType(dataType, aeTempVecType[psInst->asOperands[k].ui32RegisterNumber * 4 + j]); - } - } - } - - // Use at minimum int type when any operand is immediate. - // Allowing bool could lead into bugs like case 883080 - if (foundImmediate && (dataType == SVT_VOID || dataType == SVT_BOOL)) - dataType = SVT_INT; - - if (dataType != SVT_VOID) - { - // Found data type, write to all operands - // First adjust it to not have precision qualifiers in it - switch (dataType) - { - case SVT_FLOAT10: - case SVT_FLOAT16: - dataType = SVT_FLOAT; - break; - case SVT_INT12: - case SVT_INT16: - dataType = SVT_INT; - break; - case SVT_UINT16: - case SVT_UINT8: - dataType = SVT_UINT; - break; - default: - break; - } - for (k = 0; k < psInst->ui32NumOperands; k++) - { - uint32_t mask; - if (psInst->eOpcode == OPCODE_MOVC && k == 1) - continue; // Ignore the condition operand, it's always int - - if (psInst->asOperands[k].eType != OPERAND_TYPE_TEMP) - continue; - if (psInst->asOperands[k].eMinPrecision != OPERAND_MIN_PRECISION_DEFAULT) - continue; - - mask = psInst->asOperands[k].GetAccessMask(); - SetVectorType(aeTempVecType, psInst->asOperands[k].ui32RegisterNumber * 4, mask, dataType, &madeProgress); - - } - - } - } - } - } while (madeProgress != 0); - } - - - // translate forced_int and int_ambiguous back to int - for (i = 0; i < ui32TempCount * 4; i++) - { - if (aeTempVecType[i] == SVT_FORCED_INT || aeTempVecType[i] == SVT_INT_AMBIGUOUS) - aeTempVecType[i] = SVT_INT; - } - - ForEachOperand(instructions.begin(), instructions.end(), FEO_FLAG_ALL, WritebackDataTypes(psContext, &aeTempVecType[0])); - - // Propagate boolean data types over logical operators - bool didProgress = false; - do - { - didProgress = false; - std::for_each(instructions.begin(), instructions.end(), [&didProgress, &psContext, &aeTempVecType](Instruction &i) - { - if ((i.eOpcode == OPCODE_AND || i.eOpcode == OPCODE_OR) - && (i.asOperands[1].GetDataType(psContext) == SVT_BOOL && i.asOperands[2].GetDataType(psContext) == SVT_BOOL) - && (i.asOperands[0].eType == OPERAND_TYPE_TEMP && i.asOperands[0].GetDataType(psContext) != SVT_BOOL)) - { - // Check if all uses see only this define - bool isStandalone = true; - std::for_each(i.m_Uses.begin(), i.m_Uses.end(), [&isStandalone](Instruction::Use &u) - { - if (u.m_Op->m_Defines.size() > 1) - isStandalone = false; - }); - - if (isStandalone) - { - didProgress = true; - // Change data type of this and all uses - i.asOperands[0].aeDataType[0] = i.asOperands[0].aeDataType[1] = i.asOperands[0].aeDataType[2] = i.asOperands[0].aeDataType[3] = SVT_BOOL; - uint32_t reg = i.asOperands[0].ui32RegisterNumber; - aeTempVecType[reg * 4 + 0] = aeTempVecType[reg * 4 + 1] = aeTempVecType[reg * 4 + 2] = aeTempVecType[reg * 4 + 3] = SVT_BOOL; - - std::for_each(i.m_Uses.begin(), i.m_Uses.end(), [](Instruction::Use &u) - { - u.m_Op->aeDataType[0] = u.m_Op->aeDataType[1] = u.m_Op->aeDataType[2] = u.m_Op->aeDataType[3] = SVT_BOOL; - }); - } - } - }); - } while (didProgress); - + switch (psInst->eOpcode) + { + // All float-only ops + case OPCODE_ADD: + case OPCODE_DERIV_RTX: + case OPCODE_DERIV_RTY: + case OPCODE_DIV: + case OPCODE_DP2: + case OPCODE_DP3: + case OPCODE_DP4: + case OPCODE_EXP: + case OPCODE_FRC: + case OPCODE_LOG: + case OPCODE_MAD: + case OPCODE_MIN: + case OPCODE_MAX: + case OPCODE_MUL: + case OPCODE_ROUND_NE: + case OPCODE_ROUND_NI: + case OPCODE_ROUND_PI: + case OPCODE_ROUND_Z: + case OPCODE_RSQ: + case OPCODE_SAMPLE: + case OPCODE_SAMPLE_C: + case OPCODE_SAMPLE_C_LZ: + case OPCODE_SAMPLE_L: + case OPCODE_SAMPLE_D: + case OPCODE_SAMPLE_B: + case OPCODE_SQRT: + case OPCODE_SINCOS: + case OPCODE_LOD: + case OPCODE_GATHER4: + + case OPCODE_DERIV_RTX_COARSE: + case OPCODE_DERIV_RTX_FINE: + case OPCODE_DERIV_RTY_COARSE: + case OPCODE_DERIV_RTY_FINE: + case OPCODE_GATHER4_C: + case OPCODE_GATHER4_PO: + case OPCODE_GATHER4_PO_C: + case OPCODE_RCP: + + MarkAllOperandsAs(psInst, SVT_FLOAT, aeTempVecType); + break; + + // Comparison ops, need to enable possibility for going boolean + case OPCODE_IEQ: + case OPCODE_INE: + MarkOperandAs(&psInst->asOperands[0], SVT_BOOL, aeTempVecType); + MarkOperandAs(&psInst->asOperands[1], SVT_INT_AMBIGUOUS, aeTempVecType); + MarkOperandAs(&psInst->asOperands[2], SVT_INT_AMBIGUOUS, aeTempVecType); + break; + + case OPCODE_IF: + case OPCODE_BREAKC: + case OPCODE_CALLC: + case OPCODE_CONTINUEC: + case OPCODE_RETC: + MarkOperandAs(&psInst->asOperands[0], SVT_BOOL, aeTempVecType); + break; + + case OPCODE_ILT: + case OPCODE_IGE: + MarkOperandAs(&psInst->asOperands[0], SVT_BOOL, aeTempVecType); + MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType); + MarkOperandAs(&psInst->asOperands[2], SVT_INT, aeTempVecType); + break; + + case OPCODE_ULT: + case OPCODE_UGE: + MarkOperandAs(&psInst->asOperands[0], SVT_BOOL, aeTempVecType); + MarkOperandAs(&psInst->asOperands[1], SVT_UINT, aeTempVecType); + MarkOperandAs(&psInst->asOperands[2], SVT_UINT, aeTempVecType); + break; + + case OPCODE_AND: + case OPCODE_OR: + MarkOperandAs(&psInst->asOperands[0], SVT_INT_AMBIGUOUS, aeTempVecType); + MarkOperandAs(&psInst->asOperands[1], SVT_BOOL, aeTempVecType); + MarkOperandAs(&psInst->asOperands[2], SVT_BOOL, aeTempVecType); + break; + + // Integer ops that don't care of signedness + case OPCODE_IADD: + case OPCODE_INEG: + case OPCODE_ISHL: + case OPCODE_NOT: + case OPCODE_XOR: + case OPCODE_BUFINFO: + case OPCODE_COUNTBITS: + case OPCODE_FIRSTBIT_HI: + case OPCODE_FIRSTBIT_LO: + case OPCODE_FIRSTBIT_SHI: + case OPCODE_BFI: + case OPCODE_BFREV: + case OPCODE_ATOMIC_AND: + case OPCODE_ATOMIC_OR: + case OPCODE_ATOMIC_XOR: + case OPCODE_ATOMIC_CMP_STORE: + case OPCODE_ATOMIC_IADD: + case OPCODE_IMM_ATOMIC_IADD: + case OPCODE_IMM_ATOMIC_AND: + case OPCODE_IMM_ATOMIC_OR: + case OPCODE_IMM_ATOMIC_XOR: + case OPCODE_IMM_ATOMIC_EXCH: + case OPCODE_IMM_ATOMIC_CMP_EXCH: + + + MarkAllOperandsAs(psInst, SVT_INT_AMBIGUOUS, aeTempVecType); + break; + + + // Integer ops + case OPCODE_IMAD: + case OPCODE_IMAX: + case OPCODE_IMIN: + case OPCODE_IMUL: + case OPCODE_ISHR: + case OPCODE_IBFE: + + case OPCODE_ATOMIC_IMAX: + case OPCODE_ATOMIC_IMIN: + case OPCODE_IMM_ATOMIC_IMAX: + case OPCODE_IMM_ATOMIC_IMIN: + MarkAllOperandsAs(psInst, SVT_INT, aeTempVecType); + break; + + + // uint ops + case OPCODE_UDIV: + case OPCODE_UMUL: + case OPCODE_UMAD: + case OPCODE_UMAX: + case OPCODE_UMIN: + case OPCODE_USHR: + case OPCODE_UADDC: + case OPCODE_USUBB: + case OPCODE_ATOMIC_UMAX: + case OPCODE_ATOMIC_UMIN: + case OPCODE_IMM_ATOMIC_UMAX: + case OPCODE_IMM_ATOMIC_UMIN: + case OPCODE_IMM_ATOMIC_ALLOC: + case OPCODE_IMM_ATOMIC_CONSUME: + MarkAllOperandsAs(psInst, SVT_UINT, aeTempVecType); + break; + case OPCODE_UBFE: + MarkOperandAs(&psInst->asOperands[0], SVT_UINT, aeTempVecType); + MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType); + MarkOperandAs(&psInst->asOperands[2], SVT_INT, aeTempVecType); + MarkOperandAs(&psInst->asOperands[3], SVT_UINT, aeTempVecType); + break; + + // Need special handling + case OPCODE_FTOI: + case OPCODE_FTOU: + MarkOperandAs(&psInst->asOperands[0], psInst->eOpcode == OPCODE_FTOI ? SVT_INT : SVT_UINT, aeTempVecType); + MarkOperandAs(&psInst->asOperands[1], SVT_FLOAT, aeTempVecType); + break; + + case OPCODE_GE: + case OPCODE_LT: + case OPCODE_EQ: + case OPCODE_NE: + + MarkOperandAs(&psInst->asOperands[0], SVT_BOOL, aeTempVecType); + MarkOperandAs(&psInst->asOperands[1], SVT_FLOAT, aeTempVecType); + MarkOperandAs(&psInst->asOperands[2], SVT_FLOAT, aeTempVecType); + break; + + case OPCODE_ITOF: + case OPCODE_UTOF: + MarkOperandAs(&psInst->asOperands[0], SVT_FLOAT, aeTempVecType); + MarkOperandAs(&psInst->asOperands[1], psInst->eOpcode == OPCODE_ITOF ? SVT_INT : SVT_UINT, aeTempVecType); + break; + + case OPCODE_LD: + case OPCODE_LD_MS: + // TODO: Would need to know the sampler return type + MarkOperandAs(&psInst->asOperands[0], SVT_FLOAT, aeTempVecType); + MarkOperandAs(&psInst->asOperands[1], SVT_UINT, aeTempVecType); + break; + + case OPCODE_MOVC: + MarkOperandAs(&psInst->asOperands[1], SVT_BOOL, aeTempVecType); + case OPCODE_SWAPC: + MarkOperandAs(&psInst->asOperands[2], SVT_BOOL, aeTempVecType); + break; + + case OPCODE_RESINFO: + // Operand 0 depends on the return type declaration, op 1 is always uint + MarkOperandAs(&psInst->asOperands[1], SVT_UINT, aeTempVecType); + switch (psInst->eResInfoReturnType) + { + default: + case RESINFO_INSTRUCTION_RETURN_FLOAT: + case RESINFO_INSTRUCTION_RETURN_RCPFLOAT: + MarkOperandAs(&psInst->asOperands[0], SVT_FLOAT, aeTempVecType); + break; + case RESINFO_INSTRUCTION_RETURN_UINT: + MarkOperandAs(&psInst->asOperands[0], SVT_UINT, aeTempVecType); + break; + } + + case OPCODE_SAMPLE_INFO: + // Sample_info uses the same RESINFO_RETURN_TYPE for storage. 0 = float, 1 = uint. + MarkOperandAs(&psInst->asOperands[0], psInst->eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_FLOAT ? SVT_FLOAT : SVT_UINT, aeTempVecType); + break; + + case OPCODE_SAMPLE_POS: + MarkOperandAs(&psInst->asOperands[0], SVT_FLOAT, aeTempVecType); + break; + + + case OPCODE_LD_UAV_TYPED: + // translates to gvec4 loadImage(gimage i, ivec p). + MarkOperandAs(&psInst->asOperands[0], SVT_INT, aeTempVecType); + MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType); // ivec p + break; + + case OPCODE_STORE_UAV_TYPED: + // translates to storeImage(gimage i, ivec p, gvec4 data) + MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType); // ivec p + MarkOperandAs(&psInst->asOperands[2], SVT_INT, aeTempVecType); // gvec4 data + break; + + case OPCODE_LD_RAW: + if (psInst->asOperands[2].eType == OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY) + MarkOperandAs(&psInst->asOperands[0], SVT_UINT, aeTempVecType); + else + MarkOperandAs(&psInst->asOperands[0], SVT_FLOAT, aeTempVecType); + MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType); + break; + + case OPCODE_STORE_RAW: + if (psInst->asOperands[0].eType == OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY) + MarkOperandAs(&psInst->asOperands[0], SVT_UINT, aeTempVecType); + else + MarkOperandAs(&psInst->asOperands[0], SVT_FLOAT, aeTempVecType); + MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType); + break; + + case OPCODE_LD_STRUCTURED: + MarkOperandAs(&psInst->asOperands[0], SVT_INT, aeTempVecType); + MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType); + MarkOperandAs(&psInst->asOperands[2], SVT_INT, aeTempVecType); + break; + + case OPCODE_STORE_STRUCTURED: + MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType); + MarkOperandAs(&psInst->asOperands[2], SVT_INT, aeTempVecType); + MarkOperandAs(&psInst->asOperands[3], SVT_INT, aeTempVecType); + break; + + case OPCODE_F32TOF16: + MarkOperandAs(&psInst->asOperands[0], SVT_UINT, aeTempVecType); + MarkOperandAs(&psInst->asOperands[1], SVT_FLOAT, aeTempVecType); + break; + + case OPCODE_F16TOF32: + MarkOperandAs(&psInst->asOperands[0], SVT_FLOAT, aeTempVecType); + MarkOperandAs(&psInst->asOperands[1], SVT_UINT, aeTempVecType); + break; + + + // No-operands, should never get here anyway + /* case OPCODE_BREAK: + case OPCODE_CALL: + case OPCODE_CASE: + case OPCODE_CONTINUE: + case OPCODE_CUT: + case OPCODE_DEFAULT: + case OPCODE_DISCARD: + case OPCODE_ELSE: + case OPCODE_EMIT: + case OPCODE_EMITTHENCUT: + case OPCODE_ENDIF: + case OPCODE_ENDLOOP: + case OPCODE_ENDSWITCH: + + case OPCODE_LABEL: + case OPCODE_LOOP: + case OPCODE_CUSTOMDATA: + case OPCODE_NOP: + case OPCODE_RET: + case OPCODE_SWITCH: + case OPCODE_DCL_RESOURCE: // DCL* opcodes have + case OPCODE_DCL_CONSTANT_BUFFER: // custom operand formats. + case OPCODE_DCL_SAMPLER: + case OPCODE_DCL_INDEX_RANGE: + case OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY: + case OPCODE_DCL_GS_INPUT_PRIMITIVE: + case OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT: + case OPCODE_DCL_INPUT: + case OPCODE_DCL_INPUT_SGV: + case OPCODE_DCL_INPUT_SIV: + case OPCODE_DCL_INPUT_PS: + case OPCODE_DCL_INPUT_PS_SGV: + case OPCODE_DCL_INPUT_PS_SIV: + case OPCODE_DCL_OUTPUT: + case OPCODE_DCL_OUTPUT_SGV: + case OPCODE_DCL_OUTPUT_SIV: + case OPCODE_DCL_TEMPS: + case OPCODE_DCL_INDEXABLE_TEMP: + case OPCODE_DCL_GLOBAL_FLAGS: + + + case OPCODE_HS_DECLS: // token marks beginning of HS sub-shader + case OPCODE_HS_CONTROL_POINT_PHASE: // token marks beginning of HS sub-shader + case OPCODE_HS_FORK_PHASE: // token marks beginning of HS sub-shader + case OPCODE_HS_JOIN_PHASE: // token marks beginning of HS sub-shader + + case OPCODE_EMIT_STREAM: + case OPCODE_CUT_STREAM: + case OPCODE_EMITTHENCUT_STREAM: + case OPCODE_INTERFACE_CALL: + + + case OPCODE_DCL_STREAM: + case OPCODE_DCL_FUNCTION_BODY: + case OPCODE_DCL_FUNCTION_TABLE: + case OPCODE_DCL_INTERFACE: + + case OPCODE_DCL_INPUT_CONTROL_POINT_COUNT: + case OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT: + case OPCODE_DCL_TESS_DOMAIN: + case OPCODE_DCL_TESS_PARTITIONING: + case OPCODE_DCL_TESS_OUTPUT_PRIMITIVE: + case OPCODE_DCL_HS_MAX_TESSFACTOR: + case OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT: + case OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT: + + case OPCODE_DCL_THREAD_GROUP: + case OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED: + case OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW: + case OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED: + case OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_RAW: + case OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_STRUCTURED: + case OPCODE_DCL_RESOURCE_RAW: + case OPCODE_DCL_RESOURCE_STRUCTURED: + case OPCODE_SYNC: + + // TODO + case OPCODE_DADD: + case OPCODE_DMAX: + case OPCODE_DMIN: + case OPCODE_DMUL: + case OPCODE_DEQ: + case OPCODE_DGE: + case OPCODE_DLT: + case OPCODE_DNE: + case OPCODE_DMOV: + case OPCODE_DMOVC: + case OPCODE_DTOF: + case OPCODE_FTOD: + + case OPCODE_EVAL_SNAPPED: + case OPCODE_EVAL_SAMPLE_INDEX: + case OPCODE_EVAL_CENTROID: + + case OPCODE_DCL_GS_INSTANCE_COUNT: + + case OPCODE_ABORT: + case OPCODE_DEBUG_BREAK:*/ + + default: + break; + } + } + } + + { + int madeProgress = 0; + // Next go through MOV and MOVC and propagate the data type of whichever parameter we happen to have + do + { + madeProgress = 0; + psInst = psFirstInst; + for (i = 0; i < (uint32_t)instructions.size(); ++i, psInst++) + { + if (psInst->eOpcode == OPCODE_MOV || psInst->eOpcode == OPCODE_MOVC) + { + // Figure out the data type + uint32_t k; + SHADER_VARIABLE_TYPE dataType = SVT_VOID; + int foundImmediate = 0; + for (k = 0; k < psInst->ui32NumOperands; k++) + { + uint32_t mask, j; + if (psInst->eOpcode == OPCODE_MOVC && k == 1) + continue; // Ignore the condition operand, it's always int + + if (psInst->asOperands[k].eType == OPERAND_TYPE_IMMEDIATE32) + { + foundImmediate = 1; + continue; // We don't know the data type of immediates yet, but if this is the only one found, mark as int, it'll get promoted later if needed + } + + if (psInst->asOperands[k].eType != OPERAND_TYPE_TEMP) + { + dataType = psInst->asOperands[k].GetDataType(psContext); + break; + } + + if (psInst->asOperands[k].eModifier != OPERAND_MODIFIER_NONE) + { + // If any modifiers are used in MOV or MOVC, that automatically is treated as float. + dataType = SVT_FLOAT; + break; + } + + mask = psInst->asOperands[k].GetAccessMask(); + for (j = 0; j < 4; j++) + { + if (!(mask & (1 << j))) + continue; + if (aeTempVecType[psInst->asOperands[k].ui32RegisterNumber * 4 + j] != SVT_VOID) + { + dataType = HLSLcc::SelectHigherType(dataType, aeTempVecType[psInst->asOperands[k].ui32RegisterNumber * 4 + j]); + } + } + } + + // Use at minimum int type when any operand is immediate. + // Allowing bool could lead into bugs like case 883080 + if (foundImmediate && (dataType == SVT_VOID || dataType == SVT_BOOL)) + dataType = SVT_INT; + + if (dataType != SVT_VOID) + { + // Found data type, write to all operands + // First adjust it to not have precision qualifiers in it + switch (dataType) + { + case SVT_FLOAT10: + case SVT_FLOAT16: + dataType = SVT_FLOAT; + break; + case SVT_INT12: + case SVT_INT16: + dataType = SVT_INT; + break; + case SVT_UINT16: + case SVT_UINT8: + dataType = SVT_UINT; + break; + default: + break; + } + for (k = 0; k < psInst->ui32NumOperands; k++) + { + uint32_t mask; + if (psInst->eOpcode == OPCODE_MOVC && k == 1) + continue; // Ignore the condition operand, it's always int + + if (psInst->asOperands[k].eType != OPERAND_TYPE_TEMP) + continue; + if (psInst->asOperands[k].eMinPrecision != OPERAND_MIN_PRECISION_DEFAULT) + continue; + + mask = psInst->asOperands[k].GetAccessMask(); + SetVectorType(aeTempVecType, psInst->asOperands[k].ui32RegisterNumber * 4, mask, dataType, &madeProgress); + } + } + } + } + } + while (madeProgress != 0); + } + + + // translate forced_int and int_ambiguous back to int + for (i = 0; i < ui32TempCount * 4; i++) + { + if (aeTempVecType[i] == SVT_FORCED_INT || aeTempVecType[i] == SVT_INT_AMBIGUOUS) + aeTempVecType[i] = SVT_INT; + } + + ForEachOperand(instructions.begin(), instructions.end(), FEO_FLAG_ALL, WritebackDataTypes(psContext, &aeTempVecType[0])); + + // Propagate boolean data types over logical operators + bool didProgress = false; + do + { + didProgress = false; + std::for_each(instructions.begin(), instructions.end(), [&didProgress, &psContext, &aeTempVecType](Instruction &i) + { + if ((i.eOpcode == OPCODE_AND || i.eOpcode == OPCODE_OR) + && (i.asOperands[1].GetDataType(psContext) == SVT_BOOL && i.asOperands[2].GetDataType(psContext) == SVT_BOOL) + && (i.asOperands[0].eType == OPERAND_TYPE_TEMP && i.asOperands[0].GetDataType(psContext) != SVT_BOOL)) + { + // Check if all uses see only this define + bool isStandalone = true; + std::for_each(i.m_Uses.begin(), i.m_Uses.end(), [&isStandalone](Instruction::Use &u) + { + if (u.m_Op->m_Defines.size() > 1) + isStandalone = false; + }); + + if (isStandalone) + { + didProgress = true; + // Change data type of this and all uses + i.asOperands[0].aeDataType[0] = i.asOperands[0].aeDataType[1] = i.asOperands[0].aeDataType[2] = i.asOperands[0].aeDataType[3] = SVT_BOOL; + uint32_t reg = i.asOperands[0].ui32RegisterNumber; + aeTempVecType[reg * 4 + 0] = aeTempVecType[reg * 4 + 1] = aeTempVecType[reg * 4 + 2] = aeTempVecType[reg * 4 + 3] = SVT_BOOL; + + std::for_each(i.m_Uses.begin(), i.m_Uses.end(), [](Instruction::Use &u) + { + u.m_Op->aeDataType[0] = u.m_Op->aeDataType[1] = u.m_Op->aeDataType[2] = u.m_Op->aeDataType[3] = SVT_BOOL; + }); + } + } + }); + } + while (didProgress); } diff --git a/src/Declaration.cpp b/src/Declaration.cpp index b9b4d42..4171fb3 100644 --- a/src/Declaration.cpp +++ b/src/Declaration.cpp @@ -1,2 +1 @@ - -#include "internal_includes/Declaration.h" \ No newline at end of file +#include "internal_includes/Declaration.h" diff --git a/src/HLSLCrossCompilerContext.cpp b/src/HLSLCrossCompilerContext.cpp index 77c29fa..f3be7ff 100644 --- a/src/HLSLCrossCompilerContext.cpp +++ b/src/HLSLCrossCompilerContext.cpp @@ -1,4 +1,3 @@ - #include "internal_includes/HLSLCrossCompilerContext.h" #include "internal_includes/HLSLccToolkit.h" #include "internal_includes/Shader.h" @@ -13,287 +12,289 @@ void HLSLCrossCompilerContext::DoDataTypeAnalysis(ShaderPhase *psPhase) { - size_t ui32DeclCount = psPhase->psDecl.size(); - uint32_t i; + size_t ui32DeclCount = psPhase->psDecl.size(); + uint32_t i; + + psPhase->psTempDeclaration = NULL; + psPhase->ui32OrigTemps = 0; + psPhase->ui32TotalTemps = 0; - psPhase->psTempDeclaration = NULL; - psPhase->ui32OrigTemps = 0; - psPhase->ui32TotalTemps = 0; + // Retrieve the temp decl count + for (i = 0; i < ui32DeclCount; ++i) + { + if (psPhase->psDecl[i].eOpcode == OPCODE_DCL_TEMPS) + { + psPhase->ui32TotalTemps = psPhase->psDecl[i].value.ui32NumTemps; + psPhase->psTempDeclaration = &psPhase->psDecl[i]; + break; + } + } - // Retrieve the temp decl count - for (i = 0; i < ui32DeclCount; ++i) - { - if (psPhase->psDecl[i].eOpcode == OPCODE_DCL_TEMPS) - { - psPhase->ui32TotalTemps = psPhase->psDecl[i].value.ui32NumTemps; - psPhase->psTempDeclaration = &psPhase->psDecl[i]; - break; - } - } + if (psPhase->ui32TotalTemps == 0) + return; - if (psPhase->ui32TotalTemps == 0) - return; + psPhase->ui32OrigTemps = psPhase->ui32TotalTemps; - psPhase->ui32OrigTemps = psPhase->ui32TotalTemps; + // The split table is a table containing the index of the original register this register was split out from, or 0xffffffff + // Format: lowest 16 bits: original register. bits 16-23: rebase (eg value of 1 means .yzw was changed to .xyz): bits 24-31: component count + psPhase->pui32SplitInfo.clear(); + psPhase->pui32SplitInfo.resize(psPhase->ui32TotalTemps * 2, 0xffffffff); - // The split table is a table containing the index of the original register this register was split out from, or 0xffffffff - // Format: lowest 16 bits: original register. bits 16-23: rebase (eg value of 1 means .yzw was changed to .xyz): bits 24-31: component count - psPhase->pui32SplitInfo.clear(); - psPhase->pui32SplitInfo.resize(psPhase->ui32TotalTemps * 2, 0xffffffff); + // Build use-define chains and split temps based on those. + { + DefineUseChains duChains; + UseDefineChains udChains; - // Build use-define chains and split temps based on those. - { - DefineUseChains duChains; - UseDefineChains udChains; + BuildUseDefineChains(psPhase->psInst, psPhase->ui32TotalTemps, duChains, udChains, psPhase->GetCFG()); - BuildUseDefineChains(psPhase->psInst, psPhase->ui32TotalTemps, duChains, udChains, psPhase->GetCFG()); + CalculateStandaloneDefinitions(duChains, psPhase->ui32TotalTemps); - CalculateStandaloneDefinitions(duChains, psPhase->ui32TotalTemps); + // Only do sampler precision downgrade on pixel shaders. + if (psShader->eShaderType == PIXEL_SHADER) + UpdateSamplerPrecisions(psShader->sInfo, duChains, psPhase->ui32TotalTemps); - // Only do sampler precision downgrade on pixel shaders. - if (psShader->eShaderType == PIXEL_SHADER) - UpdateSamplerPrecisions(psShader->sInfo, duChains, psPhase->ui32TotalTemps); + UDSplitTemps(&psPhase->ui32TotalTemps, duChains, udChains, psPhase->pui32SplitInfo); - UDSplitTemps(&psPhase->ui32TotalTemps, duChains, udChains, psPhase->pui32SplitInfo); + WriteBackUsesAndDefines(duChains); + } - WriteBackUsesAndDefines(duChains); - } - - HLSLcc::DataTypeAnalysis::SetDataTypes(this, psPhase->psInst, psPhase->ui32TotalTemps, psPhase->peTempTypes); + HLSLcc::DataTypeAnalysis::SetDataTypes(this, psPhase->psInst, psPhase->ui32TotalTemps, psPhase->peTempTypes); - if (psPhase->psTempDeclaration && (psPhase->ui32OrigTemps != psPhase->ui32TotalTemps)) - psPhase->psTempDeclaration->value.ui32NumTemps = psPhase->ui32TotalTemps; + if (psPhase->psTempDeclaration && (psPhase->ui32OrigTemps != psPhase->ui32TotalTemps)) + psPhase->psTempDeclaration->value.ui32NumTemps = psPhase->ui32TotalTemps; } void HLSLCrossCompilerContext::ClearDependencyData() { - - switch (psShader->eShaderType) - { - case PIXEL_SHADER: - { - psDependencies->ClearCrossDependencyData(); - break; - } - case HULL_SHADER: - { - psDependencies->eTessPartitioning = TESSELLATOR_PARTITIONING_UNDEFINED; - psDependencies->eTessOutPrim = TESSELLATOR_OUTPUT_UNDEFINED; - break; - } - default: - break; - } + switch (psShader->eShaderType) + { + case PIXEL_SHADER: + { + psDependencies->ClearCrossDependencyData(); + break; + } + case HULL_SHADER: + { + psDependencies->eTessPartitioning = TESSELLATOR_PARTITIONING_UNDEFINED; + psDependencies->eTessOutPrim = TESSELLATOR_OUTPUT_UNDEFINED; + break; + } + default: + break; + } } void HLSLCrossCompilerContext::AddIndentation() { - int i; - bstring glsl = *currentGLSLString; - for (i = 0; i < indent; ++i) - { - bcatcstr(glsl, " "); - } + int i; + bstring glsl = *currentGLSLString; + for (i = 0; i < indent; ++i) + { + bcatcstr(glsl, " "); + } } bool HLSLCrossCompilerContext::RequireExtension(const std::string &extName) { - if (m_EnabledExtensions.find(extName) != m_EnabledExtensions.end()) - return true; + if (m_EnabledExtensions.find(extName) != m_EnabledExtensions.end()) + return true; - m_EnabledExtensions.insert(extName); - bformata(extensions, "#extension %s : require\n", extName.c_str()); - return false; + m_EnabledExtensions.insert(extName); + bformata(extensions, "#extension %s : require\n", extName.c_str()); + return false; } bool HLSLCrossCompilerContext::EnableExtension(const std::string &extName) { - if (m_EnabledExtensions.find(extName) != m_EnabledExtensions.end()) - return true; - - m_EnabledExtensions.insert(extName); - bformata(extensions, "#ifdef %s\n", extName.c_str()); - bformata(extensions, "#extension %s : enable\n", extName.c_str()); - bcatcstr(extensions, "#endif\n"); - return false; + if (m_EnabledExtensions.find(extName) != m_EnabledExtensions.end()) + return true; + + m_EnabledExtensions.insert(extName); + bformata(extensions, "#ifdef %s\n", extName.c_str()); + bformata(extensions, "#extension %s : enable\n", extName.c_str()); + bcatcstr(extensions, "#endif\n"); + return false; } std::string HLSLCrossCompilerContext::GetDeclaredInputName(const Operand* psOperand, int *piRebase, int iIgnoreRedirect, uint32_t *puiIgnoreSwizzle) const { - std::ostringstream oss; - const ShaderInfo::InOutSignature* psIn = NULL; - int regSpace = psOperand->GetRegisterSpace(this); - - if (iIgnoreRedirect == 0) - { - if ((regSpace == 0 && psShader->asPhases[currentPhase].acInputNeedsRedirect[psOperand->ui32RegisterNumber] == 0xfe) - || - (regSpace == 1 && psShader->asPhases[currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] == 0xfe)) - { - oss << "phase" << currentPhase << "_Input" << regSpace << "_" << psOperand->ui32RegisterNumber; - if (piRebase) - *piRebase = 0; - return oss.str(); - } - } - - if (regSpace == 0) - psShader->sInfo.GetInputSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->GetAccessMask(), &psIn, true); - else - psShader->sInfo.GetPatchConstantSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->GetAccessMask(), &psIn, true); - - if (psIn && piRebase) - *piRebase = psIn->iRebase; - - const std::string patchPrefix = psShader->eTargetLanguage == LANG_METAL ? "patch." : "patch"; - std::string res = ""; - - bool skipPrefix = false; - if (psTranslator->TranslateSystemValue(psOperand, psIn, res, puiIgnoreSwizzle, psShader->aIndexedInput[regSpace][psOperand->ui32RegisterNumber] != 0, true, &skipPrefix, &iIgnoreRedirect)) - { - if (psShader->eTargetLanguage == LANG_METAL && (iIgnoreRedirect == 0) && !skipPrefix) - return inputPrefix + res; - else - return res; - } - - ASSERT(psIn != NULL); - oss << inputPrefix << (regSpace == 1 ? patchPrefix : "") << psIn->semanticName << psIn->ui32SemanticIndex; - return oss.str(); + std::ostringstream oss; + const ShaderInfo::InOutSignature* psIn = NULL; + int regSpace = psOperand->GetRegisterSpace(this); + + if (iIgnoreRedirect == 0) + { + if ((regSpace == 0 && psShader->asPhases[currentPhase].acInputNeedsRedirect[psOperand->ui32RegisterNumber] == 0xfe) + || + (regSpace == 1 && psShader->asPhases[currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] == 0xfe)) + { + oss << "phase" << currentPhase << "_Input" << regSpace << "_" << psOperand->ui32RegisterNumber; + if (piRebase) + *piRebase = 0; + return oss.str(); + } + } + + if (regSpace == 0) + psShader->sInfo.GetInputSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->GetAccessMask(), &psIn, true); + else + psShader->sInfo.GetPatchConstantSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->GetAccessMask(), &psIn, true); + + if (psIn && piRebase) + *piRebase = psIn->iRebase; + + const std::string patchPrefix = psShader->eTargetLanguage == LANG_METAL ? "patch." : "patch"; + std::string res = ""; + + bool skipPrefix = false; + if (psTranslator->TranslateSystemValue(psOperand, psIn, res, puiIgnoreSwizzle, psShader->aIndexedInput[regSpace][psOperand->ui32RegisterNumber] != 0, true, &skipPrefix, &iIgnoreRedirect)) + { + if (psShader->eTargetLanguage == LANG_METAL && (iIgnoreRedirect == 0) && !skipPrefix) + return inputPrefix + res; + else + return res; + } + + ASSERT(psIn != NULL); + oss << inputPrefix << (regSpace == 1 ? patchPrefix : "") << psIn->semanticName << psIn->ui32SemanticIndex; + return oss.str(); } - std::string HLSLCrossCompilerContext::GetDeclaredOutputName(const Operand* psOperand, - int* piStream, - uint32_t *puiIgnoreSwizzle, - int *piRebase, - int iIgnoreRedirect) const + int* piStream, + uint32_t *puiIgnoreSwizzle, + int *piRebase, + int iIgnoreRedirect) const { - std::ostringstream oss; - const ShaderInfo::InOutSignature* psOut = NULL; - int regSpace = psOperand->GetRegisterSpace(this); - - if (iIgnoreRedirect == 0) - { - if ((regSpace == 0 && psShader->asPhases[currentPhase].acOutputNeedsRedirect[psOperand->ui32RegisterNumber] == 0xfe) - || (regSpace == 1 && psShader->asPhases[currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] == 0xfe)) - { - oss << "phase" << currentPhase << "_Output" << regSpace << "_" << psOperand->ui32RegisterNumber; - if (piRebase) - *piRebase = 0; - return oss.str(); - } - } - - if (regSpace == 0) - psShader->sInfo.GetOutputSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->GetAccessMask(), psShader->ui32CurrentVertexOutputStream, &psOut, true); - else - psShader->sInfo.GetPatchConstantSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->GetAccessMask(), &psOut, true); - - - if (psOut && piRebase) - *piRebase = psOut->iRebase; - - if (psOut && (psOut->isIndexed.find(currentPhase) != psOut->isIndexed.end())) - { - // Need to route through temp output variable - oss << "phase" << currentPhase << "_Output" << regSpace << "_" << psOut->indexStart.find(currentPhase)->second; - if (!psOperand->m_SubOperands[0].get()) - { - oss << "[" << psOperand->ui32RegisterNumber << "]"; - } - if (piRebase) - *piRebase = 0; - return oss.str(); - } - - const std::string patchPrefix = psShader->eTargetLanguage == LANG_METAL ? "patch." : "patch"; - std::string res = ""; - - if (psTranslator->TranslateSystemValue(psOperand, psOut, res, puiIgnoreSwizzle, psShader->aIndexedOutput[regSpace][psOperand->ui32RegisterNumber], false, NULL, &iIgnoreRedirect)) - { - // clip/cull planes will always have interim variable, as HLSL operates on float4 but we need to size output accordingly with actual planes count - // with tessellation factor buffers, a separate buffer from output is used. for some reason TranslateSystemValue return *outSkipPrefix = true - // for ALL system vars and then we simply ignore it here, so opt to modify iIgnoreRedirect for these special cases - - if (psShader->eTargetLanguage == LANG_METAL && regSpace == 0 && (iIgnoreRedirect == 0)) - return outputPrefix + res; - else if (psShader->eTargetLanguage == LANG_METAL && (iIgnoreRedirect == 0)) - return patchPrefix + res; - else - return res; - } - ASSERT(psOut != NULL); - - oss << outputPrefix << (regSpace == 1 ? patchPrefix : "") << psOut->semanticName << psOut->ui32SemanticIndex; - return oss.str(); + std::ostringstream oss; + const ShaderInfo::InOutSignature* psOut = NULL; + int regSpace = psOperand->GetRegisterSpace(this); + + if (iIgnoreRedirect == 0) + { + if ((regSpace == 0 && psShader->asPhases[currentPhase].acOutputNeedsRedirect[psOperand->ui32RegisterNumber] == 0xfe) + || (regSpace == 1 && psShader->asPhases[currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] == 0xfe)) + { + oss << "phase" << currentPhase << "_Output" << regSpace << "_" << psOperand->ui32RegisterNumber; + if (piRebase) + *piRebase = 0; + return oss.str(); + } + } + + if (regSpace == 0) + psShader->sInfo.GetOutputSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->GetAccessMask(), psShader->ui32CurrentVertexOutputStream, &psOut, true); + else + psShader->sInfo.GetPatchConstantSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->GetAccessMask(), &psOut, true); + + + if (psOut && piRebase) + *piRebase = psOut->iRebase; + + if (psOut && (psOut->isIndexed.find(currentPhase) != psOut->isIndexed.end())) + { + // Need to route through temp output variable + oss << "phase" << currentPhase << "_Output" << regSpace << "_" << psOut->indexStart.find(currentPhase)->second; + if (!psOperand->m_SubOperands[0].get()) + { + oss << "[" << psOperand->ui32RegisterNumber << "]"; + } + if (piRebase) + *piRebase = 0; + return oss.str(); + } + + const std::string patchPrefix = psShader->eTargetLanguage == LANG_METAL ? "patch." : "patch"; + std::string res = ""; + + if (psTranslator->TranslateSystemValue(psOperand, psOut, res, puiIgnoreSwizzle, psShader->aIndexedOutput[regSpace][psOperand->ui32RegisterNumber], false, NULL, &iIgnoreRedirect)) + { + // clip/cull planes will always have interim variable, as HLSL operates on float4 but we need to size output accordingly with actual planes count + // with tessellation factor buffers, a separate buffer from output is used. for some reason TranslateSystemValue return *outSkipPrefix = true + // for ALL system vars and then we simply ignore it here, so opt to modify iIgnoreRedirect for these special cases + + if (psShader->eTargetLanguage == LANG_METAL && regSpace == 0 && (iIgnoreRedirect == 0)) + return outputPrefix + res; + else if (psShader->eTargetLanguage == LANG_METAL && (iIgnoreRedirect == 0)) + return patchPrefix + res; + else + return res; + } + ASSERT(psOut != NULL); + + oss << outputPrefix << (regSpace == 1 ? patchPrefix : "") << psOut->semanticName << psOut->ui32SemanticIndex; + return oss.str(); } bool HLSLCrossCompilerContext::OutputNeedsDeclaring(const Operand* psOperand, const int count) { - char compMask = (char)psOperand->ui32CompMask; - int regSpace = psOperand->GetRegisterSpace(this); - uint32_t startIndex = psOperand->ui32RegisterNumber + (psShader->ui32CurrentVertexOutputStream * 1024); // Assume less than 1K input streams - ASSERT(psShader->ui32CurrentVertexOutputStream < 4); - - // First check for various builtins, mostly depth-output ones. - if (psShader->eShaderType == PIXEL_SHADER) - { - if (psOperand->eType == OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL || - psOperand->eType == OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL) - { - return true; - } - - if (psOperand->eType == OPERAND_TYPE_OUTPUT_DEPTH) - { - // GL doesn't need declaration, Metal does. - return psShader->eTargetLanguage == LANG_METAL; - } - } - - // Needs declaring if any of the components hasn't been already declared - if ((compMask & ~psShader->acOutputDeclared[regSpace][startIndex]) != 0) - { - int offset; - const ShaderInfo::InOutSignature* psSignature = NULL; - - if (psOperand->eSpecialName == NAME_UNDEFINED) - { - // Need to fetch the actual comp mask - if (regSpace == 0) - psShader->sInfo.GetOutputSignatureFromRegister( - psOperand->ui32RegisterNumber, - psOperand->ui32CompMask, - psShader->ui32CurrentVertexOutputStream, - &psSignature); - else - psShader->sInfo.GetPatchConstantSignatureFromRegister( - psOperand->ui32RegisterNumber, - psOperand->ui32CompMask, - &psSignature); - - compMask = (char)psSignature->ui32Mask; - } - for (offset = 0; offset < count; offset++) - { - psShader->acOutputDeclared[regSpace][startIndex + offset] |= compMask; - } - - if (psSignature && (psSignature->semanticName == "PSIZE") && (psShader->eTargetLanguage != LANG_METAL)) - { - // gl_PointSize, doesn't need declaring. TODO: Metal doesn't have pointsize at all? - return false; - } - - return true; - } - - return false; + char compMask = (char)psOperand->ui32CompMask; + int regSpace = psOperand->GetRegisterSpace(this); + uint32_t startIndex = psOperand->ui32RegisterNumber + (psShader->ui32CurrentVertexOutputStream * 1024); // Assume less than 1K input streams + ASSERT(psShader->ui32CurrentVertexOutputStream < 4); + + // First check for various builtins, mostly depth-output ones. + if (psShader->eShaderType == PIXEL_SHADER) + { + if (psOperand->eType == OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL || + psOperand->eType == OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL) + { + return true; + } + + if (psOperand->eType == OPERAND_TYPE_OUTPUT_DEPTH) + { + // GL doesn't need declaration, Metal does. + return psShader->eTargetLanguage == LANG_METAL; + } + } + + // Needs declaring if any of the components hasn't been already declared + if ((compMask & ~psShader->acOutputDeclared[regSpace][startIndex]) != 0) + { + int offset; + const ShaderInfo::InOutSignature* psSignature = NULL; + + if (psOperand->eSpecialName == NAME_UNDEFINED) + { + // Need to fetch the actual comp mask + if (regSpace == 0) + psShader->sInfo.GetOutputSignatureFromRegister( + psOperand->ui32RegisterNumber, + psOperand->ui32CompMask, + psShader->ui32CurrentVertexOutputStream, + &psSignature); + else + psShader->sInfo.GetPatchConstantSignatureFromRegister( + psOperand->ui32RegisterNumber, + psOperand->ui32CompMask, + &psSignature); + + compMask = (char)psSignature->ui32Mask; + } + for (offset = 0; offset < count; offset++) + { + psShader->acOutputDeclared[regSpace][startIndex + offset] |= compMask; + } + + if (psSignature && (psSignature->semanticName == "PSIZE") && (psShader->eTargetLanguage != LANG_METAL)) + { + // gl_PointSize, doesn't need declaring. TODO: Metal doesn't have pointsize at all? + return false; + } + + return true; + } + + return false; } bool HLSLCrossCompilerContext::IsVulkan() const { - return (flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0; + return (flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0; } +bool HLSLCrossCompilerContext::IsSwitch() const +{ + return (flags & HLSLCC_FLAG_NVN_TARGET) != 0; +} diff --git a/src/HLSLcc.cpp b/src/HLSLcc.cpp index 5b894ff..efcbe9f 100644 --- a/src/HLSLcc.cpp +++ b/src/HLSLcc.cpp @@ -1,4 +1,3 @@ - #include "hlslcc.h" #include @@ -30,192 +29,191 @@ HLSLCC_API int HLSLCC_APIENTRY TranslateHLSLFromMem(const char* shader, - unsigned int flags, - GLLang language, - const GlExtensions *extensions, - GLSLCrossDependencyData* dependencies, - HLSLccSamplerPrecisionInfo& samplerPrecisions, - HLSLccReflection& reflectionCallbacks, - GLSLShader* result) + unsigned int flags, + GLLang language, + const GlExtensions *extensions, + GLSLCrossDependencyData* dependencies, + HLSLccSamplerPrecisionInfo& samplerPrecisions, + HLSLccReflection& reflectionCallbacks, + GLSLShader* result) { - uint32_t* tokens; - char* glslcstr = NULL; - int GLSLShaderType = GL_FRAGMENT_SHADER_ARB; - int success = 0; - uint32_t i; - - tokens = (uint32_t*)shader; - - std::auto_ptr psShader(DecodeDXBC(tokens, flags)); - - if (psShader.get()) - { - HLSLCrossCompilerContext sContext(reflectionCallbacks); - - // Add shader precisions from the list - psShader->sInfo.AddSamplerPrecisions(samplerPrecisions); - - if (psShader->ui32MajorVersion <= 3) - { - flags &= ~HLSLCC_FLAG_COMBINE_TEXTURE_SAMPLERS; - } - - sContext.psShader = psShader.get(); - sContext.flags = flags; - - // If dependencies == NULL, we'll create a dummy object for it so that there's always something there. - std::auto_ptr depPtr(NULL); - if (dependencies == NULL) - { - depPtr.reset(new GLSLCrossDependencyData()); - sContext.psDependencies = depPtr.get(); - } - else - sContext.psDependencies = dependencies; - - for (i = 0; i < psShader->asPhases.size(); ++i) - { - psShader->asPhases[i].hasPostShaderCode = 0; - } - - if (language == LANG_METAL) - { - // Geometry shader is not supported - if (psShader->eShaderType == GEOMETRY_SHADER) - { - result->sourceCode = ""; - return 0; - } - ToMetal translator(&sContext); - if(!translator.Translate()) - { - bdestroy(sContext.glsl); - for (i = 0; i < psShader->asPhases.size(); ++i) - { - bdestroy(psShader->asPhases[i].postShaderCode); - bdestroy(psShader->asPhases[i].earlyMain); - } - - return 0; - } - } - else - { - ToGLSL translator(&sContext); - language = translator.SetLanguage(language); - translator.SetExtensions(extensions); - if (!translator.Translate()) - { - bdestroy(sContext.glsl); - for (i = 0; i < psShader->asPhases.size(); ++i) - { - bdestroy(psShader->asPhases[i].postShaderCode); - bdestroy(psShader->asPhases[i].earlyMain); - } - - return 0; - } - } - - switch (psShader->eShaderType) - { - case VERTEX_SHADER: - { - GLSLShaderType = GL_VERTEX_SHADER_ARB; - break; - } - case GEOMETRY_SHADER: - { - GLSLShaderType = GL_GEOMETRY_SHADER; - break; - } - case DOMAIN_SHADER: - { - GLSLShaderType = GL_TESS_EVALUATION_SHADER; - break; - } - case HULL_SHADER: - { - GLSLShaderType = GL_TESS_CONTROL_SHADER; - break; - } - case COMPUTE_SHADER: - { - GLSLShaderType = GL_COMPUTE_SHADER; - break; - } - default: - { - break; - } - } - - glslcstr = bstr2cstr(sContext.glsl, '\0'); - result->sourceCode = glslcstr; - bcstrfree(glslcstr); - - bdestroy(sContext.glsl); - for (i = 0; i < psShader->asPhases.size(); ++i) - { - bdestroy(psShader->asPhases[i].postShaderCode); - bdestroy(psShader->asPhases[i].earlyMain); - } - - result->reflection = psShader->sInfo; - - result->textureSamplers = psShader->textureSamplers; - - success = 1; - } - - shader = 0; - tokens = 0; - - /* Fill in the result struct */ - - result->shaderType = GLSLShaderType; - result->GLSLLanguage = language; - - return success; + uint32_t* tokens; + char* glslcstr = NULL; + int GLSLShaderType = GL_FRAGMENT_SHADER_ARB; + int success = 0; + uint32_t i; + + tokens = (uint32_t*)shader; + + std::auto_ptr psShader(DecodeDXBC(tokens, flags)); + + if (psShader.get()) + { + HLSLCrossCompilerContext sContext(reflectionCallbacks); + + // Add shader precisions from the list + psShader->sInfo.AddSamplerPrecisions(samplerPrecisions); + + if (psShader->ui32MajorVersion <= 3) + { + flags &= ~HLSLCC_FLAG_COMBINE_TEXTURE_SAMPLERS; + } + + sContext.psShader = psShader.get(); + sContext.flags = flags; + + // If dependencies == NULL, we'll create a dummy object for it so that there's always something there. + std::auto_ptr depPtr(NULL); + if (dependencies == NULL) + { + depPtr.reset(new GLSLCrossDependencyData()); + sContext.psDependencies = depPtr.get(); + } + else + sContext.psDependencies = dependencies; + + for (i = 0; i < psShader->asPhases.size(); ++i) + { + psShader->asPhases[i].hasPostShaderCode = 0; + } + + if (language == LANG_METAL) + { + // Geometry shader is not supported + if (psShader->eShaderType == GEOMETRY_SHADER) + { + result->sourceCode = ""; + return 0; + } + ToMetal translator(&sContext); + if (!translator.Translate()) + { + bdestroy(sContext.glsl); + for (i = 0; i < psShader->asPhases.size(); ++i) + { + bdestroy(psShader->asPhases[i].postShaderCode); + bdestroy(psShader->asPhases[i].earlyMain); + } + + return 0; + } + } + else + { + ToGLSL translator(&sContext); + language = translator.SetLanguage(language); + translator.SetExtensions(extensions); + if (!translator.Translate()) + { + bdestroy(sContext.glsl); + for (i = 0; i < psShader->asPhases.size(); ++i) + { + bdestroy(psShader->asPhases[i].postShaderCode); + bdestroy(psShader->asPhases[i].earlyMain); + } + + return 0; + } + } + + switch (psShader->eShaderType) + { + case VERTEX_SHADER: + { + GLSLShaderType = GL_VERTEX_SHADER_ARB; + break; + } + case GEOMETRY_SHADER: + { + GLSLShaderType = GL_GEOMETRY_SHADER; + break; + } + case DOMAIN_SHADER: + { + GLSLShaderType = GL_TESS_EVALUATION_SHADER; + break; + } + case HULL_SHADER: + { + GLSLShaderType = GL_TESS_CONTROL_SHADER; + break; + } + case COMPUTE_SHADER: + { + GLSLShaderType = GL_COMPUTE_SHADER; + break; + } + default: + { + break; + } + } + + glslcstr = bstr2cstr(sContext.glsl, '\0'); + result->sourceCode = glslcstr; + bcstrfree(glslcstr); + + bdestroy(sContext.glsl); + for (i = 0; i < psShader->asPhases.size(); ++i) + { + bdestroy(psShader->asPhases[i].postShaderCode); + bdestroy(psShader->asPhases[i].earlyMain); + } + + result->reflection = psShader->sInfo; + + result->textureSamplers = psShader->textureSamplers; + + success = 1; + } + + shader = 0; + tokens = 0; + + /* Fill in the result struct */ + + result->shaderType = GLSLShaderType; + result->GLSLLanguage = language; + + return success; } HLSLCC_API int HLSLCC_APIENTRY TranslateHLSLFromFile(const char* filename, - unsigned int flags, - GLLang language, - const GlExtensions *extensions, - GLSLCrossDependencyData* dependencies, - HLSLccSamplerPrecisionInfo& samplerPrecisions, - HLSLccReflection& reflectionCallbacks, - GLSLShader* result) + unsigned int flags, + GLLang language, + const GlExtensions *extensions, + GLSLCrossDependencyData* dependencies, + HLSLccSamplerPrecisionInfo& samplerPrecisions, + HLSLccReflection& reflectionCallbacks, + GLSLShader* result) { - FILE* shaderFile; - int length; - size_t readLength; - std::vector shader; - int success = 0; + FILE* shaderFile; + int length; + size_t readLength; + std::vector shader; + int success = 0; - shaderFile = fopen(filename, "rb"); + shaderFile = fopen(filename, "rb"); - if (!shaderFile) - { - return 0; - } + if (!shaderFile) + { + return 0; + } - fseek(shaderFile, 0, SEEK_END); - length = ftell(shaderFile); - fseek(shaderFile, 0, SEEK_SET); + fseek(shaderFile, 0, SEEK_END); + length = ftell(shaderFile); + fseek(shaderFile, 0, SEEK_SET); - shader.reserve(length + 1); + shader.resize(length + 1); - readLength = fread(&shader[0], 1, length, shaderFile); + readLength = fread(&shader[0], 1, length, shaderFile); - fclose(shaderFile); - shaderFile = 0; + fclose(shaderFile); + shaderFile = 0; - shader[readLength] = '\0'; + shader[readLength] = '\0'; - success = TranslateHLSLFromMem(&shader[0], flags, language, extensions, dependencies, samplerPrecisions, reflectionCallbacks, result); + success = TranslateHLSLFromMem(&shader[0], flags, language, extensions, dependencies, samplerPrecisions, reflectionCallbacks, result); - return success; + return success; } - diff --git a/src/HLSLccToolkit.cpp b/src/HLSLccToolkit.cpp index 9a9a7c7..0d7b849 100644 --- a/src/HLSLccToolkit.cpp +++ b/src/HLSLccToolkit.cpp @@ -1,4 +1,3 @@ - #include "internal_includes/HLSLccToolkit.h" #include "internal_includes/debug.h" #include "internal_includes/toGLSLOperand.h" @@ -11,460 +10,457 @@ namespace HLSLcc { - uint32_t GetNumberBitsSet(uint32_t a) - { - // Calculate number of bits in a - // Taken from https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSet64 - // Works only up to 14 bits (we're only using up to 4) - return (a * 0x200040008001ULL & 0x111111111111111ULL) % 0xf; - } - - uint32_t SVTTypeToFlag(const SHADER_VARIABLE_TYPE eType) - { - if (eType == SVT_FLOAT16) - { - return TO_FLAG_FORCE_HALF; - } - if (eType == SVT_UINT || eType == SVT_UINT16) - { - return TO_FLAG_UNSIGNED_INTEGER; - } - else if (eType == SVT_INT || eType == SVT_INT16 || eType == SVT_INT12) - { - return TO_FLAG_INTEGER; - } - else if (eType == SVT_BOOL) - { - return TO_FLAG_BOOL; - } - else - { - return TO_FLAG_NONE; - } - } - - SHADER_VARIABLE_TYPE TypeFlagsToSVTType(const uint32_t typeflags) - { - if (typeflags & TO_FLAG_FORCE_HALF) - return SVT_FLOAT16; - if (typeflags & (TO_FLAG_INTEGER | TO_AUTO_BITCAST_TO_INT)) - return SVT_INT; - if (typeflags & (TO_FLAG_UNSIGNED_INTEGER | TO_AUTO_BITCAST_TO_UINT)) - return SVT_UINT; - if (typeflags & TO_FLAG_BOOL) - return SVT_BOOL; - return SVT_FLOAT; - } - - const char * GetConstructorForTypeGLSL(const HLSLCrossCompilerContext *context, const SHADER_VARIABLE_TYPE eType, const int components, bool useGLSLPrecision) - { - static const char * const uintTypes[] = { " ", "uint", "uvec2", "uvec3", "uvec4" }; - static const char * const uint16Types[] = { " ", "mediump uint", "mediump uvec2", "mediump uvec3", "mediump uvec4" }; - static const char * const intTypes[] = { " ", "int", "ivec2", "ivec3", "ivec4" }; - static const char * const int16Types[] = { " ", "mediump int", "mediump ivec2", "mediump ivec3", "mediump ivec4" }; - static const char * const int12Types[] = { " ", "lowp int", "lowp ivec2", "lowp ivec3", "lowp ivec4" }; - static const char * const floatTypes[] = { " ", "float", "vec2", "vec3", "vec4" }; - static const char * const float16Types[] = { " ", "mediump float", "mediump vec2", "mediump vec3", "mediump vec4" }; - static const char * const float10Types[] = { " ", "lowp float", "lowp vec2", "lowp vec3", "lowp vec4" }; - static const char * const boolTypes[] = { " ", "bool", "bvec2", "bvec3", "bvec4" }; - - ASSERT(components >= 1 && components <= 4); + uint32_t GetNumberBitsSet(uint32_t a) + { + // Calculate number of bits in a + // Taken from https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSet64 + // Works only up to 14 bits (we're only using up to 4) + return (a * 0x200040008001ULL & 0x111111111111111ULL) % 0xf; + } + + uint32_t SVTTypeToFlag(const SHADER_VARIABLE_TYPE eType) + { + if (eType == SVT_FLOAT16) + { + return TO_FLAG_FORCE_HALF; + } + if (eType == SVT_UINT || eType == SVT_UINT16) + { + return TO_FLAG_UNSIGNED_INTEGER; + } + else if (eType == SVT_INT || eType == SVT_INT16 || eType == SVT_INT12) + { + return TO_FLAG_INTEGER; + } + else if (eType == SVT_BOOL) + { + return TO_FLAG_BOOL; + } + else + { + return TO_FLAG_NONE; + } + } + + SHADER_VARIABLE_TYPE TypeFlagsToSVTType(const uint32_t typeflags) + { + if (typeflags & TO_FLAG_FORCE_HALF) + return SVT_FLOAT16; + if (typeflags & (TO_FLAG_INTEGER | TO_AUTO_BITCAST_TO_INT)) + return SVT_INT; + if (typeflags & (TO_FLAG_UNSIGNED_INTEGER | TO_AUTO_BITCAST_TO_UINT)) + return SVT_UINT; + if (typeflags & TO_FLAG_BOOL) + return SVT_BOOL; + return SVT_FLOAT; + } + + const char * GetConstructorForTypeGLSL(const HLSLCrossCompilerContext *context, const SHADER_VARIABLE_TYPE eType, const int components, bool useGLSLPrecision) + { + static const char * const uintTypes[] = { " ", "uint", "uvec2", "uvec3", "uvec4" }; + static const char * const uint16Types[] = { " ", "mediump uint", "mediump uvec2", "mediump uvec3", "mediump uvec4" }; + static const char * const intTypes[] = { " ", "int", "ivec2", "ivec3", "ivec4" }; + static const char * const int16Types[] = { " ", "mediump int", "mediump ivec2", "mediump ivec3", "mediump ivec4" }; + static const char * const int12Types[] = { " ", "lowp int", "lowp ivec2", "lowp ivec3", "lowp ivec4" }; + static const char * const floatTypes[] = { " ", "float", "vec2", "vec3", "vec4" }; + static const char * const float16Types[] = { " ", "mediump float", "mediump vec2", "mediump vec3", "mediump vec4" }; + static const char * const float10Types[] = { " ", "lowp float", "lowp vec2", "lowp vec3", "lowp vec4" }; + static const char * const boolTypes[] = { " ", "bool", "bvec2", "bvec3", "bvec4" }; + + ASSERT(components >= 1 && components <= 4); bool emitLowp = EmitLowp(context); - switch (eType) - { - case SVT_UINT: - return HaveUnsignedTypes(context->psShader->eTargetLanguage) ? uintTypes[components] : intTypes[components]; - case SVT_UINT16: - return useGLSLPrecision ? uint16Types[components] : uintTypes[components]; - case SVT_INT: - return intTypes[components]; - case SVT_INT16: - return useGLSLPrecision ? int16Types[components] : intTypes[components]; - case SVT_INT12: - return useGLSLPrecision ? (emitLowp ? int12Types[components] : int16Types[components]) : intTypes[components]; - case SVT_FLOAT: - return floatTypes[components]; - case SVT_FLOAT16: - return useGLSLPrecision ? float16Types[components] : floatTypes[components]; - case SVT_FLOAT10: - return useGLSLPrecision ? (emitLowp ? float10Types[components] : float16Types[components]) : floatTypes[components]; - case SVT_BOOL: - return boolTypes[components]; - default: - ASSERT(0); - return " "; - } - } - - const char * GetConstructorForTypeMetal(const SHADER_VARIABLE_TYPE eType, - const int components) - { - static const char * const uintTypes[] = { " ", "uint", "uint2", "uint3", "uint4" }; - static const char * const ushortTypes[] = { " ", "ushort", "ushort2", "ushort3", "ushort4" }; - static const char * const intTypes[] = { " ", "int", "int2", "int3", "int4" }; - static const char * const shortTypes[] = { " ", "short", "short2", "short3", "short4" }; - static const char * const floatTypes[] = { " ", "float", "float2", "float3", "float4" }; - static const char * const halfTypes[] = { " ", "half", "half2", "half3", "half4" }; - static const char * const boolTypes[] = { " ", "bool", "bool2", "bool3", "bool4" }; - - ASSERT(components >= 1 && components <= 4); - - switch (eType) - { - case SVT_UINT: - return uintTypes[components]; - case SVT_UINT16: - return ushortTypes[components]; - case SVT_INT: - return intTypes[components]; - case SVT_INT16: - case SVT_INT12: - return shortTypes[components]; - case SVT_FLOAT: - return floatTypes[components]; - case SVT_FLOAT16: - case SVT_FLOAT10: - return halfTypes[components]; - case SVT_BOOL: - return boolTypes[components]; - default: - ASSERT(0); - return " "; - } - } - - const char * GetConstructorForType(const HLSLCrossCompilerContext *psContext, const SHADER_VARIABLE_TYPE eType, const int components, bool useGLSLPrecision /* = true*/) - { - if (psContext->psShader->eTargetLanguage == LANG_METAL) - return GetConstructorForTypeMetal(eType, components); - else - return GetConstructorForTypeGLSL(psContext, eType, components, useGLSLPrecision); - } - - std::string GetMatrixTypeName(const HLSLCrossCompilerContext *psContext, const SHADER_VARIABLE_TYPE eBaseType, const int columns, const int rows) - { - std::string result; - std::ostringstream oss; - if (psContext->psShader->eTargetLanguage == LANG_METAL) - { - switch (eBaseType) - { - case SVT_FLOAT: - oss << "float" << columns << "x" << rows; - break; - case SVT_FLOAT16: - case SVT_FLOAT10: - oss << "half" << columns << "x" << rows; - break; - default: - ASSERT(0); - break; - } - } - else - { - switch (eBaseType) - { - case SVT_FLOAT: - oss << "mat" << columns << "x" << rows; - break; - case SVT_FLOAT16: - oss << "mediump mat" << columns << "x" << rows; - break; - case SVT_FLOAT10: - oss << "lowp mat" << columns << "x" << rows; - break; - default: - ASSERT(0); - break; - } - - } - result = oss.str(); - return result; - } - - void AddSwizzleUsingElementCount(bstring dest, uint32_t count) - { - if (count == 4) - return; - if (count) - { - bcatcstr(dest, "."); - bcatcstr(dest, "x"); - count--; - } - if (count) - { - bcatcstr(dest, "y"); - count--; - } - if (count) - { - bcatcstr(dest, "z"); - count--; - } - if (count) - { - bcatcstr(dest, "w"); - count--; - } - } - - // Calculate the bits set in mask - int WriteMaskToComponentCount(uint32_t writeMask) - { - // In HLSL bytecode writemask 0 also means everything - if (writeMask == 0) - return 4; - - return (int)GetNumberBitsSet(writeMask); - } - - uint32_t BuildComponentMaskFromElementCount(int count) - { - // Translate numComponents into bitmask - // 1 -> 1, 2 -> 3, 3 -> 7 and 4 -> 15 - return (1 << count) - 1; - } - - // Returns true if we can do direct assignment between types (mostly for mediump<->highp floats etc) - bool DoAssignmentDataTypesMatch(SHADER_VARIABLE_TYPE dest, SHADER_VARIABLE_TYPE src) - { - if (src == dest) - return true; - - if ((dest == SVT_FLOAT || dest == SVT_FLOAT10 || dest == SVT_FLOAT16) && - (src == SVT_FLOAT || src == SVT_FLOAT10 || src == SVT_FLOAT16)) - return true; - - if ((dest == SVT_INT || dest == SVT_INT12 || dest == SVT_INT16) && - (src == SVT_INT || src == SVT_INT12 || src == SVT_INT16)) - return true; - - if ((dest == SVT_UINT || dest == SVT_UINT16) && - (src == SVT_UINT || src == SVT_UINT16)) - return true; - - return false; - } - - uint32_t ResourceReturnTypeToFlag(const RESOURCE_RETURN_TYPE eType) - { - if (eType == RETURN_TYPE_SINT) - { - return TO_FLAG_INTEGER; - } - else if (eType == RETURN_TYPE_UINT) - { - return TO_FLAG_UNSIGNED_INTEGER; - } - else - { - return TO_FLAG_NONE; - } - } - - SHADER_VARIABLE_TYPE ResourceReturnTypeToSVTType(const RESOURCE_RETURN_TYPE eType, const REFLECT_RESOURCE_PRECISION ePrec) - { - if (eType == RETURN_TYPE_SINT) - { - switch (ePrec) - { - default: - return SVT_INT; - case REFLECT_RESOURCE_PRECISION_LOWP: - return SVT_INT12; - case REFLECT_RESOURCE_PRECISION_MEDIUMP: - return SVT_INT16; - } - } - else if (eType == RETURN_TYPE_UINT) - { - switch (ePrec) - { - default: - return SVT_UINT; - case REFLECT_RESOURCE_PRECISION_LOWP: - return SVT_UINT8; - case REFLECT_RESOURCE_PRECISION_MEDIUMP: - return SVT_UINT16; - } - } - else - { - switch (ePrec) - { - default: - return SVT_FLOAT; - case REFLECT_RESOURCE_PRECISION_LOWP: - return SVT_FLOAT10; - case REFLECT_RESOURCE_PRECISION_MEDIUMP: - return SVT_FLOAT16; - } - } - } - - - uint32_t ElemCountToAutoExpandFlag(uint32_t elemCount) - { - return TO_AUTO_EXPAND_TO_VEC2 << (elemCount - 2); - } - - // Returns true if the operation is commutative - bool IsOperationCommutative(int eOpCode) - { - switch ((OPCODE_TYPE)eOpCode) - { - case OPCODE_DADD: - case OPCODE_IADD: - case OPCODE_ADD: - case OPCODE_MUL: - case OPCODE_IMUL: - case OPCODE_OR: - case OPCODE_AND: - return true; - default: - return false; - }; - } - - // Returns true if operands are identical, only cares about temp registers currently. - bool AreTempOperandsIdentical(const Operand * psA, const Operand * psB) - { - if (!psA || !psB) - return 0; - - if (psA->eType != OPERAND_TYPE_TEMP || psB->eType != OPERAND_TYPE_TEMP) - return 0; - - if (psA->eModifier != psB->eModifier) - return 0; - - if (psA->iNumComponents != psB->iNumComponents) - return 0; - - if (psA->ui32RegisterNumber != psB->ui32RegisterNumber) - return 0; - - if (psA->eSelMode != psB->eSelMode) - return 0; - - if (psA->eSelMode == OPERAND_4_COMPONENT_MASK_MODE && psA->ui32CompMask != psB->ui32CompMask) - return 0; - - if (psA->eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE && psA->aui32Swizzle[0] != psB->aui32Swizzle[0]) - return 0; - - if (psA->eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE && std::equal(&psA->aui32Swizzle[0], &psA->aui32Swizzle[4], &psB->aui32Swizzle[0])) - return 0; - - return 1; - } - - bool IsAddOneInstruction(const Instruction *psInst) - { - if (psInst->eOpcode != OPCODE_IADD) - return false; - if (psInst->asOperands[0].eType != OPERAND_TYPE_TEMP) - return false; - - if (psInst->asOperands[1].eType == OPERAND_TYPE_TEMP) - { - if (psInst->asOperands[1].ui32RegisterNumber != psInst->asOperands[0].ui32RegisterNumber) - return false; - if (psInst->asOperands[2].eType != OPERAND_TYPE_IMMEDIATE32) - return false; - - if (*(int *)&psInst->asOperands[2].afImmediates[0] != 1) - return false; - } - else - { - if (psInst->asOperands[1].eType != OPERAND_TYPE_IMMEDIATE32) - return false; - if (psInst->asOperands[2].eType != OPERAND_TYPE_TEMP) - return false; - - if (psInst->asOperands[2].ui32RegisterNumber != psInst->asOperands[0].ui32RegisterNumber) - return false; - - if (*(int *)&psInst->asOperands[1].afImmediates[0] != 1) - return false; - } - return true; - } - - - int GetNumTextureDimensions(int /* RESOURCE_DIMENSION */ eResDim) - { - switch ((RESOURCE_DIMENSION)eResDim) - { - case RESOURCE_DIMENSION_TEXTURE1D: - return 1; - case RESOURCE_DIMENSION_TEXTURE2D: - case RESOURCE_DIMENSION_TEXTURE2DMS: - case RESOURCE_DIMENSION_TEXTURE1DARRAY: - case RESOURCE_DIMENSION_TEXTURECUBE: - return 2; - case RESOURCE_DIMENSION_TEXTURE3D: - case RESOURCE_DIMENSION_TEXTURE2DARRAY: - case RESOURCE_DIMENSION_TEXTURE2DMSARRAY: - case RESOURCE_DIMENSION_TEXTURECUBEARRAY: - return 3; - default: - ASSERT(0); - break; - } - return 0; - } - - // Returns the "more important" type of a and b, currently int < uint < float - SHADER_VARIABLE_TYPE SelectHigherType(SHADER_VARIABLE_TYPE a, SHADER_VARIABLE_TYPE b) - { + switch (eType) + { + case SVT_UINT: + return HaveUnsignedTypes(context->psShader->eTargetLanguage) ? uintTypes[components] : intTypes[components]; + case SVT_UINT16: + return useGLSLPrecision ? uint16Types[components] : uintTypes[components]; + case SVT_INT: + return intTypes[components]; + case SVT_INT16: + return useGLSLPrecision ? int16Types[components] : intTypes[components]; + case SVT_INT12: + return useGLSLPrecision ? (emitLowp ? int12Types[components] : int16Types[components]) : intTypes[components]; + case SVT_FLOAT: + return floatTypes[components]; + case SVT_FLOAT16: + return useGLSLPrecision ? float16Types[components] : floatTypes[components]; + case SVT_FLOAT10: + return useGLSLPrecision ? (emitLowp ? float10Types[components] : float16Types[components]) : floatTypes[components]; + case SVT_BOOL: + return boolTypes[components]; + default: + ASSERT(0); + return " "; + } + } + + const char * GetConstructorForTypeMetal(const SHADER_VARIABLE_TYPE eType, + const int components) + { + static const char * const uintTypes[] = { " ", "uint", "uint2", "uint3", "uint4" }; + static const char * const ushortTypes[] = { " ", "ushort", "ushort2", "ushort3", "ushort4" }; + static const char * const intTypes[] = { " ", "int", "int2", "int3", "int4" }; + static const char * const shortTypes[] = { " ", "short", "short2", "short3", "short4" }; + static const char * const floatTypes[] = { " ", "float", "float2", "float3", "float4" }; + static const char * const halfTypes[] = { " ", "half", "half2", "half3", "half4" }; + static const char * const boolTypes[] = { " ", "bool", "bool2", "bool3", "bool4" }; + + ASSERT(components >= 1 && components <= 4); + + switch (eType) + { + case SVT_UINT: + return uintTypes[components]; + case SVT_UINT16: + return ushortTypes[components]; + case SVT_INT: + return intTypes[components]; + case SVT_INT16: + case SVT_INT12: + return shortTypes[components]; + case SVT_FLOAT: + return floatTypes[components]; + case SVT_FLOAT16: + case SVT_FLOAT10: + return halfTypes[components]; + case SVT_BOOL: + return boolTypes[components]; + default: + ASSERT(0); + return " "; + } + } + + const char * GetConstructorForType(const HLSLCrossCompilerContext *psContext, const SHADER_VARIABLE_TYPE eType, const int components, bool useGLSLPrecision /* = true*/) + { + if (psContext->psShader->eTargetLanguage == LANG_METAL) + return GetConstructorForTypeMetal(eType, components); + else + return GetConstructorForTypeGLSL(psContext, eType, components, useGLSLPrecision); + } + + std::string GetMatrixTypeName(const HLSLCrossCompilerContext *psContext, const SHADER_VARIABLE_TYPE eBaseType, const int columns, const int rows) + { + std::string result; + std::ostringstream oss; + if (psContext->psShader->eTargetLanguage == LANG_METAL) + { + switch (eBaseType) + { + case SVT_FLOAT: + oss << "float" << columns << "x" << rows; + break; + case SVT_FLOAT16: + case SVT_FLOAT10: + oss << "half" << columns << "x" << rows; + break; + default: + ASSERT(0); + break; + } + } + else + { + switch (eBaseType) + { + case SVT_FLOAT: + oss << "mat" << columns << "x" << rows; + break; + case SVT_FLOAT16: + oss << "mediump mat" << columns << "x" << rows; + break; + case SVT_FLOAT10: + oss << "lowp mat" << columns << "x" << rows; + break; + default: + ASSERT(0); + break; + } + } + result = oss.str(); + return result; + } + + void AddSwizzleUsingElementCount(bstring dest, uint32_t count) + { + if (count == 4) + return; + if (count) + { + bcatcstr(dest, "."); + bcatcstr(dest, "x"); + count--; + } + if (count) + { + bcatcstr(dest, "y"); + count--; + } + if (count) + { + bcatcstr(dest, "z"); + count--; + } + if (count) + { + bcatcstr(dest, "w"); + count--; + } + } + + // Calculate the bits set in mask + int WriteMaskToComponentCount(uint32_t writeMask) + { + // In HLSL bytecode writemask 0 also means everything + if (writeMask == 0) + return 4; + + return (int)GetNumberBitsSet(writeMask); + } + + uint32_t BuildComponentMaskFromElementCount(int count) + { + // Translate numComponents into bitmask + // 1 -> 1, 2 -> 3, 3 -> 7 and 4 -> 15 + return (1 << count) - 1; + } + + // Returns true if we can do direct assignment between types (mostly for mediump<->highp floats etc) + bool DoAssignmentDataTypesMatch(SHADER_VARIABLE_TYPE dest, SHADER_VARIABLE_TYPE src) + { + if (src == dest) + return true; + + if ((dest == SVT_FLOAT || dest == SVT_FLOAT10 || dest == SVT_FLOAT16) && + (src == SVT_FLOAT || src == SVT_FLOAT10 || src == SVT_FLOAT16)) + return true; + + if ((dest == SVT_INT || dest == SVT_INT12 || dest == SVT_INT16) && + (src == SVT_INT || src == SVT_INT12 || src == SVT_INT16)) + return true; + + if ((dest == SVT_UINT || dest == SVT_UINT16) && + (src == SVT_UINT || src == SVT_UINT16)) + return true; + + return false; + } + + uint32_t ResourceReturnTypeToFlag(const RESOURCE_RETURN_TYPE eType) + { + if (eType == RETURN_TYPE_SINT) + { + return TO_FLAG_INTEGER; + } + else if (eType == RETURN_TYPE_UINT) + { + return TO_FLAG_UNSIGNED_INTEGER; + } + else + { + return TO_FLAG_NONE; + } + } + + SHADER_VARIABLE_TYPE ResourceReturnTypeToSVTType(const RESOURCE_RETURN_TYPE eType, const REFLECT_RESOURCE_PRECISION ePrec) + { + if (eType == RETURN_TYPE_SINT) + { + switch (ePrec) + { + default: + return SVT_INT; + case REFLECT_RESOURCE_PRECISION_LOWP: + return SVT_INT12; + case REFLECT_RESOURCE_PRECISION_MEDIUMP: + return SVT_INT16; + } + } + else if (eType == RETURN_TYPE_UINT) + { + switch (ePrec) + { + default: + return SVT_UINT; + case REFLECT_RESOURCE_PRECISION_LOWP: + return SVT_UINT8; + case REFLECT_RESOURCE_PRECISION_MEDIUMP: + return SVT_UINT16; + } + } + else + { + switch (ePrec) + { + default: + return SVT_FLOAT; + case REFLECT_RESOURCE_PRECISION_LOWP: + return SVT_FLOAT10; + case REFLECT_RESOURCE_PRECISION_MEDIUMP: + return SVT_FLOAT16; + } + } + } + + uint32_t ElemCountToAutoExpandFlag(uint32_t elemCount) + { + return TO_AUTO_EXPAND_TO_VEC2 << (elemCount - 2); + } + + // Returns true if the operation is commutative + bool IsOperationCommutative(int eOpCode) + { + switch ((OPCODE_TYPE)eOpCode) + { + case OPCODE_DADD: + case OPCODE_IADD: + case OPCODE_ADD: + case OPCODE_MUL: + case OPCODE_IMUL: + case OPCODE_OR: + case OPCODE_AND: + return true; + default: + return false; + } + } + + // Returns true if operands are identical, only cares about temp registers currently. + bool AreTempOperandsIdentical(const Operand * psA, const Operand * psB) + { + if (!psA || !psB) + return 0; + + if (psA->eType != OPERAND_TYPE_TEMP || psB->eType != OPERAND_TYPE_TEMP) + return 0; + + if (psA->eModifier != psB->eModifier) + return 0; + + if (psA->iNumComponents != psB->iNumComponents) + return 0; + + if (psA->ui32RegisterNumber != psB->ui32RegisterNumber) + return 0; + + if (psA->eSelMode != psB->eSelMode) + return 0; + + if (psA->eSelMode == OPERAND_4_COMPONENT_MASK_MODE && psA->ui32CompMask != psB->ui32CompMask) + return 0; + + if (psA->eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE && psA->aui32Swizzle[0] != psB->aui32Swizzle[0]) + return 0; + + if (psA->eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE && std::equal(&psA->aui32Swizzle[0], &psA->aui32Swizzle[4], &psB->aui32Swizzle[0])) + return 0; + + return 1; + } + + bool IsAddOneInstruction(const Instruction *psInst) + { + if (psInst->eOpcode != OPCODE_IADD) + return false; + if (psInst->asOperands[0].eType != OPERAND_TYPE_TEMP) + return false; + + if (psInst->asOperands[1].eType == OPERAND_TYPE_TEMP) + { + if (psInst->asOperands[1].ui32RegisterNumber != psInst->asOperands[0].ui32RegisterNumber) + return false; + if (psInst->asOperands[2].eType != OPERAND_TYPE_IMMEDIATE32) + return false; + + if (*(int *)&psInst->asOperands[2].afImmediates[0] != 1) + return false; + } + else + { + if (psInst->asOperands[1].eType != OPERAND_TYPE_IMMEDIATE32) + return false; + if (psInst->asOperands[2].eType != OPERAND_TYPE_TEMP) + return false; + + if (psInst->asOperands[2].ui32RegisterNumber != psInst->asOperands[0].ui32RegisterNumber) + return false; + + if (*(int *)&psInst->asOperands[1].afImmediates[0] != 1) + return false; + } + return true; + } + + int GetNumTextureDimensions(int /* RESOURCE_DIMENSION */ eResDim) + { + switch ((RESOURCE_DIMENSION)eResDim) + { + case RESOURCE_DIMENSION_TEXTURE1D: + return 1; + case RESOURCE_DIMENSION_TEXTURE2D: + case RESOURCE_DIMENSION_TEXTURE2DMS: + case RESOURCE_DIMENSION_TEXTURE1DARRAY: + case RESOURCE_DIMENSION_TEXTURECUBE: + return 2; + case RESOURCE_DIMENSION_TEXTURE3D: + case RESOURCE_DIMENSION_TEXTURE2DARRAY: + case RESOURCE_DIMENSION_TEXTURE2DMSARRAY: + case RESOURCE_DIMENSION_TEXTURECUBEARRAY: + return 3; + default: + ASSERT(0); + break; + } + return 0; + } + + // Returns the "more important" type of a and b, currently int < uint < float + SHADER_VARIABLE_TYPE SelectHigherType(SHADER_VARIABLE_TYPE a, SHADER_VARIABLE_TYPE b) + { #define DO_CHECK(type) if( a == type || b == type ) return type - // Priority ordering - DO_CHECK(SVT_FLOAT16); - DO_CHECK(SVT_FLOAT10); - DO_CHECK(SVT_UINT16); - DO_CHECK(SVT_UINT8); - DO_CHECK(SVT_INT16); - DO_CHECK(SVT_INT12); - DO_CHECK(SVT_FORCED_INT); - DO_CHECK(SVT_FLOAT); - DO_CHECK(SVT_UINT); - DO_CHECK(SVT_INT); - DO_CHECK(SVT_INT_AMBIGUOUS); + // Priority ordering + DO_CHECK(SVT_FLOAT16); + DO_CHECK(SVT_FLOAT10); + DO_CHECK(SVT_UINT16); + DO_CHECK(SVT_UINT8); + DO_CHECK(SVT_INT16); + DO_CHECK(SVT_INT12); + DO_CHECK(SVT_FORCED_INT); + DO_CHECK(SVT_FLOAT); + DO_CHECK(SVT_UINT); + DO_CHECK(SVT_INT); + DO_CHECK(SVT_INT_AMBIGUOUS); #undef DO_CHECK - // After these just rely on ordering. - return a > b ? a : b; - } - - // Returns true if a direct constructor can convert src->dest - bool CanDoDirectCast(const HLSLCrossCompilerContext *context, SHADER_VARIABLE_TYPE src, SHADER_VARIABLE_TYPE dest) - { - // uint<->int<->bool conversions possible - if ((src == SVT_INT || src == SVT_UINT || src == SVT_BOOL || src == SVT_INT12 || src == SVT_INT16 || src == SVT_UINT16) && - (dest == SVT_INT || dest == SVT_UINT || dest == SVT_BOOL || dest == SVT_INT12 || dest == SVT_INT16 || dest == SVT_UINT16)) - return true; - - // float<->double possible - if ((src == SVT_FLOAT || src == SVT_DOUBLE || src == SVT_FLOAT16 || src == SVT_FLOAT10) && - (dest == SVT_FLOAT || dest == SVT_DOUBLE || dest == SVT_FLOAT16 || dest == SVT_FLOAT10)) - return true; - - if (context->psShader->eTargetLanguage == LANG_METAL) - { - // avoid compiler error: cannot use as_type to cast from 'half' to 'unsigned int', types of different size - if ((src == SVT_FLOAT16 || src == SVT_FLOAT10) && (dest == SVT_UINT)) - return true; - } - - return false; - } + // After these just rely on ordering. + return a > b ? a : b; + } + + // Returns true if a direct constructor can convert src->dest + bool CanDoDirectCast(const HLSLCrossCompilerContext *context, SHADER_VARIABLE_TYPE src, SHADER_VARIABLE_TYPE dest) + { + // uint<->int<->bool conversions possible + if ((src == SVT_INT || src == SVT_UINT || src == SVT_BOOL || src == SVT_INT12 || src == SVT_INT16 || src == SVT_UINT16) && + (dest == SVT_INT || dest == SVT_UINT || dest == SVT_BOOL || dest == SVT_INT12 || dest == SVT_INT16 || dest == SVT_UINT16)) + return true; + + // float<->double possible + if ((src == SVT_FLOAT || src == SVT_DOUBLE || src == SVT_FLOAT16 || src == SVT_FLOAT10) && + (dest == SVT_FLOAT || dest == SVT_DOUBLE || dest == SVT_FLOAT16 || dest == SVT_FLOAT10)) + return true; + + if (context->psShader->eTargetLanguage == LANG_METAL) + { + // avoid compiler error: cannot use as_type to cast from 'half' to 'unsigned int', types of different size + if ((src == SVT_FLOAT16 || src == SVT_FLOAT10) && (dest == SVT_UINT)) + return true; + } + + return false; + } bool IsUnityFlexibleInstancingBuffer(const ConstantBuffer* psCBuf) { @@ -481,22 +477,57 @@ namespace HLSLcc #endif #endif // #ifndef fpcheck - // Helper function to print floats with full precision - void PrintFloat(bstring b, float f) - { - bstring temp; - int ePos; - int pointPos; + // Helper function to print floats with full precision + void PrintFloat(bstring b, float f) + { + bstring temp; + int ePos; + int pointPos; + + temp = bformat("%.9g", f); + ePos = bstrchrp(temp, 'e', 0); + pointPos = bstrchrp(temp, '.', 0); + + bconcat(b, temp); + bdestroy(temp); - temp = bformat("%.9g", f); - ePos = bstrchrp(temp, 'e', 0); - pointPos = bstrchrp(temp, '.', 0); + if (ePos < 0 && pointPos < 0 && !fpcheck(f)) + bcatcstr(b, ".0"); + } + + bstring GetEarlyMain(HLSLCrossCompilerContext *psContext) + { + bstring *oldString = psContext->currentGLSLString; + bstring *str = &psContext->psShader->asPhases[psContext->currentPhase].earlyMain; + int indent = psContext->indent; - bconcat(b, temp); - bdestroy(temp); + if (psContext->psShader->eTargetLanguage == LANG_METAL && !psContext->indent) + ++psContext->indent; - if (ePos < 0 && pointPos < 0 && !fpcheck(f)) - bcatcstr(b, ".0"); - } -}; + psContext->currentGLSLString = str; + psContext->AddIndentation(); + psContext->currentGLSLString = oldString; + psContext->indent = indent; + return *str; + } + + bstring GetPostShaderCode(HLSLCrossCompilerContext *psContext) + { + bstring *oldString = psContext->currentGLSLString; + bstring *str = &psContext->psShader->asPhases[psContext->currentPhase].postShaderCode; + int indent = psContext->indent; + + if (psContext->psShader->eTargetLanguage == LANG_METAL && !psContext->indent) + ++psContext->indent; + + psContext->psShader->asPhases[psContext->currentPhase].hasPostShaderCode = 1; + + psContext->currentGLSLString = str; + psContext->AddIndentation(); + psContext->currentGLSLString = oldString; + psContext->indent = indent; + + return *str; + } +} diff --git a/src/HLSLccTypes.natvis b/src/HLSLccTypes.natvis index 6456304..6dd7c23 100644 --- a/src/HLSLccTypes.natvis +++ b/src/HLSLccTypes.natvis @@ -1,10 +1,10 @@ - {{ id={id} op={eOpcode} o0={asOperands[0]}, o1={asOperands[1]}}} + {{ id={id} op={eOpcode} o0={asOperands[0]}, o1={asOperands[1]}}} {{ type={eType}, reg={ui32RegisterNumber} }} - \ No newline at end of file + diff --git a/src/Instruction.cpp b/src/Instruction.cpp index 7a001a0..ee384cd 100644 --- a/src/Instruction.cpp +++ b/src/Instruction.cpp @@ -1,4 +1,3 @@ - #include "internal_includes/Instruction.h" #include "internal_includes/debug.h" #include "include/ShaderInfo.h" @@ -6,353 +5,345 @@ // Returns the result swizzle operand for an instruction, or NULL if all src operands have swizzles static Operand *GetSrcSwizzleOperand(Instruction *psInst) { - switch (psInst->eOpcode) - { - case OPCODE_DP2: - case OPCODE_DP3: - case OPCODE_DP4: - case OPCODE_NOP: - case OPCODE_SWAPC: - case OPCODE_SAMPLE_C: - case OPCODE_SAMPLE_C_LZ: - ASSERT(0); - return NULL; - - // Normal arithmetics, all srcs have swizzles - case OPCODE_ADD: - case OPCODE_AND: - case OPCODE_DERIV_RTX: - case OPCODE_DERIV_RTX_COARSE: - case OPCODE_DERIV_RTX_FINE: - case OPCODE_DERIV_RTY: - case OPCODE_DERIV_RTY_COARSE: - case OPCODE_DERIV_RTY_FINE: - case OPCODE_DIV: - case OPCODE_EQ: - case OPCODE_EXP: - case OPCODE_FRC: - case OPCODE_FTOI: - case OPCODE_FTOU: - case OPCODE_GE: - case OPCODE_IADD: - case OPCODE_IEQ: - case OPCODE_IGE: - case OPCODE_ILT: - case OPCODE_IMAD: - case OPCODE_IMAX: - case OPCODE_IMIN: - case OPCODE_IMUL: - case OPCODE_INE: - case OPCODE_INEG: - case OPCODE_ITOF: - case OPCODE_LOG: - case OPCODE_LT: - case OPCODE_MAD: - case OPCODE_MAX: - case OPCODE_MIN: - case OPCODE_MOV: - case OPCODE_MUL: - case OPCODE_NE: - case OPCODE_NOT: - case OPCODE_OR: - case OPCODE_ROUND_NE: - case OPCODE_ROUND_NI: - case OPCODE_ROUND_PI: - case OPCODE_ROUND_Z: - case OPCODE_RSQ: - case OPCODE_SINCOS: - case OPCODE_SQRT: - case OPCODE_UDIV: - case OPCODE_UGE: - case OPCODE_ULT: - case OPCODE_UMAD: - case OPCODE_UMAX: - case OPCODE_UMIN: - case OPCODE_UMUL: - case OPCODE_UTOF: - case OPCODE_XOR: - - case OPCODE_BFI: - case OPCODE_BFREV: - case OPCODE_COUNTBITS: - case OPCODE_DADD: - case OPCODE_DDIV: - case OPCODE_DEQ: - case OPCODE_DFMA: - case OPCODE_DGE: - case OPCODE_DLT: - case OPCODE_DMAX: - case OPCODE_DMIN: - case OPCODE_DMUL: - case OPCODE_DMOV: - case OPCODE_DNE: - case OPCODE_DRCP: - case OPCODE_DTOF: - case OPCODE_F16TOF32: - case OPCODE_F32TOF16: - case OPCODE_FIRSTBIT_HI: - case OPCODE_FIRSTBIT_LO: - case OPCODE_FIRSTBIT_SHI: - case OPCODE_FTOD: - case OPCODE_IBFE: - case OPCODE_RCP: - case OPCODE_UADDC: - case OPCODE_UBFE: - case OPCODE_USUBB: - case OPCODE_MOVC: - case OPCODE_DMOVC: - return NULL; - - // Special cases: - case OPCODE_GATHER4: - case OPCODE_GATHER4_C: - case OPCODE_LD: - case OPCODE_LD_MS: - case OPCODE_LOD: - case OPCODE_LD_UAV_TYPED: - case OPCODE_LD_RAW: - case OPCODE_SAMPLE: - case OPCODE_SAMPLE_B: - case OPCODE_SAMPLE_L: - case OPCODE_SAMPLE_D: - case OPCODE_RESINFO: - return &psInst->asOperands[2]; - - case OPCODE_GATHER4_PO: - case OPCODE_GATHER4_PO_C: - case OPCODE_LD_STRUCTURED: - return &psInst->asOperands[3]; - - case OPCODE_SAMPLE_INFO: - return &psInst->asOperands[1]; - - case OPCODE_ISHL: - case OPCODE_ISHR: - case OPCODE_USHR: - // sm4 variant has single component selection on src1 -> only src0 has swizzle - if (psInst->asOperands[2].eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE) - return &psInst->asOperands[1]; - else // whereas sm5 variant has swizzle also on src1 - return NULL; - - default: - ASSERT(0); - return NULL; - - - } - + switch (psInst->eOpcode) + { + case OPCODE_DP2: + case OPCODE_DP3: + case OPCODE_DP4: + case OPCODE_NOP: + case OPCODE_SWAPC: + case OPCODE_SAMPLE_C: + case OPCODE_SAMPLE_C_LZ: + ASSERT(0); + return NULL; + + // Normal arithmetics, all srcs have swizzles + case OPCODE_ADD: + case OPCODE_AND: + case OPCODE_DERIV_RTX: + case OPCODE_DERIV_RTX_COARSE: + case OPCODE_DERIV_RTX_FINE: + case OPCODE_DERIV_RTY: + case OPCODE_DERIV_RTY_COARSE: + case OPCODE_DERIV_RTY_FINE: + case OPCODE_DIV: + case OPCODE_EQ: + case OPCODE_EXP: + case OPCODE_FRC: + case OPCODE_FTOI: + case OPCODE_FTOU: + case OPCODE_GE: + case OPCODE_IADD: + case OPCODE_IEQ: + case OPCODE_IGE: + case OPCODE_ILT: + case OPCODE_IMAD: + case OPCODE_IMAX: + case OPCODE_IMIN: + case OPCODE_IMUL: + case OPCODE_INE: + case OPCODE_INEG: + case OPCODE_ITOF: + case OPCODE_LOG: + case OPCODE_LT: + case OPCODE_MAD: + case OPCODE_MAX: + case OPCODE_MIN: + case OPCODE_MOV: + case OPCODE_MUL: + case OPCODE_NE: + case OPCODE_NOT: + case OPCODE_OR: + case OPCODE_ROUND_NE: + case OPCODE_ROUND_NI: + case OPCODE_ROUND_PI: + case OPCODE_ROUND_Z: + case OPCODE_RSQ: + case OPCODE_SINCOS: + case OPCODE_SQRT: + case OPCODE_UDIV: + case OPCODE_UGE: + case OPCODE_ULT: + case OPCODE_UMAD: + case OPCODE_UMAX: + case OPCODE_UMIN: + case OPCODE_UMUL: + case OPCODE_UTOF: + case OPCODE_XOR: + + case OPCODE_BFI: + case OPCODE_BFREV: + case OPCODE_COUNTBITS: + case OPCODE_DADD: + case OPCODE_DDIV: + case OPCODE_DEQ: + case OPCODE_DFMA: + case OPCODE_DGE: + case OPCODE_DLT: + case OPCODE_DMAX: + case OPCODE_DMIN: + case OPCODE_DMUL: + case OPCODE_DMOV: + case OPCODE_DNE: + case OPCODE_DRCP: + case OPCODE_DTOF: + case OPCODE_F16TOF32: + case OPCODE_F32TOF16: + case OPCODE_FIRSTBIT_HI: + case OPCODE_FIRSTBIT_LO: + case OPCODE_FIRSTBIT_SHI: + case OPCODE_FTOD: + case OPCODE_IBFE: + case OPCODE_RCP: + case OPCODE_UADDC: + case OPCODE_UBFE: + case OPCODE_USUBB: + case OPCODE_MOVC: + case OPCODE_DMOVC: + return NULL; + + // Special cases: + case OPCODE_GATHER4: + case OPCODE_GATHER4_C: + case OPCODE_LD: + case OPCODE_LD_MS: + case OPCODE_LOD: + case OPCODE_LD_UAV_TYPED: + case OPCODE_LD_RAW: + case OPCODE_SAMPLE: + case OPCODE_SAMPLE_B: + case OPCODE_SAMPLE_L: + case OPCODE_SAMPLE_D: + case OPCODE_RESINFO: + return &psInst->asOperands[2]; + + case OPCODE_GATHER4_PO: + case OPCODE_GATHER4_PO_C: + case OPCODE_LD_STRUCTURED: + return &psInst->asOperands[3]; + + case OPCODE_SAMPLE_INFO: + return &psInst->asOperands[1]; + + case OPCODE_ISHL: + case OPCODE_ISHR: + case OPCODE_USHR: + // sm4 variant has single component selection on src1 -> only src0 has swizzle + if (psInst->asOperands[2].eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE) + return &psInst->asOperands[1]; + else // whereas sm5 variant has swizzle also on src1 + return NULL; + + default: + ASSERT(0); + return NULL; + } } // Tweak the source operands of an instruction so that the rebased write mask will still work static void DoSrcOperandRebase(Operand *psOperand, uint32_t rebase) { - uint32_t i; - switch (psOperand->eSelMode) - { - default: - case OPERAND_4_COMPONENT_MASK_MODE: - ASSERT(psOperand->ui32CompMask == 0 || psOperand->ui32CompMask == OPERAND_4_COMPONENT_MASK_ALL); - - // Special case for immediates, they do not have swizzles - if (psOperand->eType == OPERAND_TYPE_IMMEDIATE32) - { - if (psOperand->iNumComponents > 1) - std::copy(&psOperand->afImmediates[rebase], &psOperand->afImmediates[4], &psOperand->afImmediates[0]); - return; - } - if (psOperand->eType == OPERAND_TYPE_IMMEDIATE64) - { - if (psOperand->iNumComponents > 1) - std::copy(&psOperand->adImmediates[rebase], &psOperand->adImmediates[4], &psOperand->adImmediates[0]); - return; - } - - // Need to change this to swizzle - psOperand->eSelMode = OPERAND_4_COMPONENT_SWIZZLE_MODE; - psOperand->ui32Swizzle = 0; - for (i = 0; i < 4 - rebase; i++) - psOperand->aui32Swizzle[i] = i + rebase; - for (; i < 4; i++) - psOperand->aui32Swizzle[i] = rebase; // The first actual input. - break; - case OPERAND_4_COMPONENT_SELECT_1_MODE: - // Nothing to do - break; - case OPERAND_4_COMPONENT_SWIZZLE_MODE: - for (i = rebase; i < 4; i++) - psOperand->aui32Swizzle[i - rebase] = psOperand->aui32Swizzle[i]; - break; - } + uint32_t i; + switch (psOperand->eSelMode) + { + default: + case OPERAND_4_COMPONENT_MASK_MODE: + ASSERT(psOperand->ui32CompMask == 0 || psOperand->ui32CompMask == OPERAND_4_COMPONENT_MASK_ALL); + + // Special case for immediates, they do not have swizzles + if (psOperand->eType == OPERAND_TYPE_IMMEDIATE32) + { + if (psOperand->iNumComponents > 1) + std::copy(&psOperand->afImmediates[rebase], &psOperand->afImmediates[4], &psOperand->afImmediates[0]); + return; + } + if (psOperand->eType == OPERAND_TYPE_IMMEDIATE64) + { + if (psOperand->iNumComponents > 1) + std::copy(&psOperand->adImmediates[rebase], &psOperand->adImmediates[4], &psOperand->adImmediates[0]); + return; + } + + // Need to change this to swizzle + psOperand->eSelMode = OPERAND_4_COMPONENT_SWIZZLE_MODE; + psOperand->ui32Swizzle = 0; + for (i = 0; i < 4 - rebase; i++) + psOperand->aui32Swizzle[i] = i + rebase; + for (; i < 4; i++) + psOperand->aui32Swizzle[i] = rebase; // The first actual input. + break; + case OPERAND_4_COMPONENT_SELECT_1_MODE: + // Nothing to do + break; + case OPERAND_4_COMPONENT_SWIZZLE_MODE: + for (i = rebase; i < 4; i++) + psOperand->aui32Swizzle[i - rebase] = psOperand->aui32Swizzle[i]; + break; + } } void Instruction::ChangeOperandTempRegister(Operand *psOperand, uint32_t oldReg, uint32_t newReg, uint32_t compMask, uint32_t flags, uint32_t rebase) { - uint32_t i = 0; - uint32_t accessMask = 0; - int isDestination = 0; - Operand *psSwizzleOperand = NULL; - - if (flags & UD_CHANGE_SUBOPERANDS) - { - for (i = 0; i < MAX_SUB_OPERANDS; i++) - { - if (psOperand->m_SubOperands[i].get()) - ChangeOperandTempRegister(psOperand->m_SubOperands[i].get(), oldReg, newReg, compMask, UD_CHANGE_ALL, rebase); - } - } - - if ((flags & UD_CHANGE_MAIN_OPERAND) == 0) - return; - - if (psOperand->eType != OPERAND_TYPE_TEMP) - return; - - if (psOperand->ui32RegisterNumber != oldReg) - return; - - accessMask = psOperand->GetAccessMask(); - // If this operation touches other components than the one(s) we're splitting, skip it - if ((accessMask & (~compMask)) != 0) - { - // Verify that we've not messed up in reachability analysis. - // This would mean that we've encountered an instruction that accesses - // a component in multi-component mode and we're supposed to treat it as single-use only. - // Now that we track operands we can bring this back - ASSERT((accessMask & compMask) == 0); - return; - } + uint32_t i = 0; + uint32_t accessMask = 0; + int isDestination = 0; + Operand *psSwizzleOperand = NULL; + + if (flags & UD_CHANGE_SUBOPERANDS) + { + for (i = 0; i < MAX_SUB_OPERANDS; i++) + { + if (psOperand->m_SubOperands[i].get()) + ChangeOperandTempRegister(psOperand->m_SubOperands[i].get(), oldReg, newReg, compMask, UD_CHANGE_ALL, rebase); + } + } + + if ((flags & UD_CHANGE_MAIN_OPERAND) == 0) + return; + + if (psOperand->eType != OPERAND_TYPE_TEMP) + return; + + if (psOperand->ui32RegisterNumber != oldReg) + return; + + accessMask = psOperand->GetAccessMask(); + // If this operation touches other components than the one(s) we're splitting, skip it + if ((accessMask & (~compMask)) != 0) + { + // Verify that we've not messed up in reachability analysis. + // This would mean that we've encountered an instruction that accesses + // a component in multi-component mode and we're supposed to treat it as single-use only. + // Now that we track operands we can bring this back + ASSERT((accessMask & compMask) == 0); + return; + } #if 0 - printf("Updating operand %d with access mask %X\n", (int)psOperand->id, accessMask); + printf("Updating operand %d with access mask %X\n", (int)psOperand->id, accessMask); #endif - psOperand->ui32RegisterNumber = newReg; - - if (rebase == 0) - return; - - // Update component mask. Note that we don't need to do anything to the suboperands. They do not affect destination writemask. - switch (psOperand->eSelMode) - { - case OPERAND_4_COMPONENT_MASK_MODE: - { - uint32_t oldMask = psOperand->ui32CompMask; - if (oldMask == 0) - oldMask = OPERAND_4_COMPONENT_MASK_ALL; - - // Check that we're not losing any information - ASSERT((oldMask >> rebase) << rebase == oldMask); - psOperand->ui32CompMask = (oldMask >> rebase); - break; - } - case OPERAND_4_COMPONENT_SELECT_1_MODE: - ASSERT(psOperand->aui32Swizzle[0] >= rebase); - psOperand->aui32Swizzle[0] -= rebase; - break; - case OPERAND_4_COMPONENT_SWIZZLE_MODE: - { - for (i = 0; i < 4; i++) - { - // Note that this rebase is different from the one done for source operands - ASSERT(psOperand->aui32Swizzle[i] >= rebase); - psOperand->aui32Swizzle[i] -= rebase; - } - break; - } - default: - ASSERT(0); - - } - - // Tweak operand datatypes - std::copy(&psOperand->aeDataType[rebase], &psOperand->aeDataType[4], &psOperand->aeDataType[0]); - - // If this operand is a destination, we'll need to tweak sources as well - for (i = 0; i < ui32FirstSrc; i++) - { - if (psOperand == &asOperands[i]) - { - isDestination = 1; - break; - } - } - - if (isDestination == 0) - return; - - // Nasty corner case of 2 destinations, not supported if both targets are written - ASSERT((ui32FirstSrc < 2) || (asOperands[0].eType == OPERAND_TYPE_NULL) || (asOperands[1].eType == OPERAND_TYPE_NULL)); - - // If we made it this far, we're rebasing a destination temp (and the only destination), need to tweak sources depending on the instruction - switch (eOpcode) - { - // The opcodes that do not need tweaking: - case OPCODE_DP2: - case OPCODE_DP3: - case OPCODE_DP4: - case OPCODE_BUFINFO: - case OPCODE_SAMPLE_C: - case OPCODE_SAMPLE_C_LZ: - return; - - default: - psSwizzleOperand = GetSrcSwizzleOperand(this); // Null means tweak all source operands - if (psSwizzleOperand) - { - DoSrcOperandRebase(psSwizzleOperand, rebase); - return; - } - else - { - for (i = ui32FirstSrc; i < ui32NumOperands; i++) - { - DoSrcOperandRebase(&asOperands[i], rebase); - } - } - return; - } - + psOperand->ui32RegisterNumber = newReg; + + if (rebase == 0) + return; + + // Update component mask. Note that we don't need to do anything to the suboperands. They do not affect destination writemask. + switch (psOperand->eSelMode) + { + case OPERAND_4_COMPONENT_MASK_MODE: + { + uint32_t oldMask = psOperand->ui32CompMask; + if (oldMask == 0) + oldMask = OPERAND_4_COMPONENT_MASK_ALL; + + // Check that we're not losing any information + ASSERT((oldMask >> rebase) << rebase == oldMask); + psOperand->ui32CompMask = (oldMask >> rebase); + break; + } + case OPERAND_4_COMPONENT_SELECT_1_MODE: + ASSERT(psOperand->aui32Swizzle[0] >= rebase); + psOperand->aui32Swizzle[0] -= rebase; + break; + case OPERAND_4_COMPONENT_SWIZZLE_MODE: + { + for (i = 0; i < 4; i++) + { + // Note that this rebase is different from the one done for source operands + ASSERT(psOperand->aui32Swizzle[i] >= rebase); + psOperand->aui32Swizzle[i] -= rebase; + } + break; + } + default: + ASSERT(0); + } + + // Tweak operand datatypes + std::copy(&psOperand->aeDataType[rebase], &psOperand->aeDataType[4], &psOperand->aeDataType[0]); + + // If this operand is a destination, we'll need to tweak sources as well + for (i = 0; i < ui32FirstSrc; i++) + { + if (psOperand == &asOperands[i]) + { + isDestination = 1; + break; + } + } + + if (isDestination == 0) + return; + + // Nasty corner case of 2 destinations, not supported if both targets are written + ASSERT((ui32FirstSrc < 2) || (asOperands[0].eType == OPERAND_TYPE_NULL) || (asOperands[1].eType == OPERAND_TYPE_NULL)); + + // If we made it this far, we're rebasing a destination temp (and the only destination), need to tweak sources depending on the instruction + switch (eOpcode) + { + // The opcodes that do not need tweaking: + case OPCODE_DP2: + case OPCODE_DP3: + case OPCODE_DP4: + case OPCODE_BUFINFO: + case OPCODE_SAMPLE_C: + case OPCODE_SAMPLE_C_LZ: + return; + + default: + psSwizzleOperand = GetSrcSwizzleOperand(this); // Null means tweak all source operands + if (psSwizzleOperand) + { + DoSrcOperandRebase(psSwizzleOperand, rebase); + return; + } + else + { + for (i = ui32FirstSrc; i < ui32NumOperands; i++) + { + DoSrcOperandRebase(&asOperands[i], rebase); + } + } + return; + } } - // Returns nonzero if psInst is a sample instruction and the sampler has medium or low precision bool Instruction::IsPartialPrecisionSamplerInstruction(const ShaderInfo &info, OPERAND_MIN_PRECISION *pType) const { - const Operand *op; - const ResourceBinding *psBinding = NULL; - OPERAND_MIN_PRECISION sType = OPERAND_MIN_PRECISION_DEFAULT; - switch (eOpcode) - { - default: - return false; - case OPCODE_SAMPLE: - case OPCODE_SAMPLE_B: - case OPCODE_SAMPLE_L: - case OPCODE_SAMPLE_D: - case OPCODE_SAMPLE_C: - case OPCODE_SAMPLE_C_LZ: - break; - } - - op = &asOperands[3]; - ASSERT(op->eType == OPERAND_TYPE_SAMPLER); - - info.GetResourceFromBindingPoint(RGROUP_SAMPLER, op->ui32RegisterNumber, &psBinding); - if (!psBinding) - { - /* Try to look from texture group */ - info.GetResourceFromBindingPoint(RGROUP_TEXTURE, op->ui32RegisterNumber, &psBinding); - } - - sType = Operand::ResourcePrecisionToOperandPrecision(psBinding ? psBinding->ePrecision : REFLECT_RESOURCE_PRECISION_UNKNOWN); - - if (sType == OPERAND_MIN_PRECISION_DEFAULT) - return false; - - if (pType) - *pType = sType; - - return true; + const Operand *op; + const ResourceBinding *psBinding = NULL; + OPERAND_MIN_PRECISION sType = OPERAND_MIN_PRECISION_DEFAULT; + switch (eOpcode) + { + default: + return false; + case OPCODE_SAMPLE: + case OPCODE_SAMPLE_B: + case OPCODE_SAMPLE_L: + case OPCODE_SAMPLE_D: + case OPCODE_SAMPLE_C: + case OPCODE_SAMPLE_C_LZ: + break; + } + + op = &asOperands[3]; + ASSERT(op->eType == OPERAND_TYPE_SAMPLER); + + info.GetResourceFromBindingPoint(RGROUP_SAMPLER, op->ui32RegisterNumber, &psBinding); + if (!psBinding) + { + /* Try to look from texture group */ + info.GetResourceFromBindingPoint(RGROUP_TEXTURE, op->ui32RegisterNumber, &psBinding); + } + + sType = Operand::ResourcePrecisionToOperandPrecision(psBinding ? psBinding->ePrecision : REFLECT_RESOURCE_PRECISION_UNKNOWN); + + if (sType == OPERAND_MIN_PRECISION_DEFAULT) + return false; + + if (pType) + *pType = sType; + + return true; } - - diff --git a/src/LoopTransform.cpp b/src/LoopTransform.cpp index 05c72cf..e3ba6e6 100644 --- a/src/LoopTransform.cpp +++ b/src/LoopTransform.cpp @@ -1,4 +1,3 @@ - #include "src/internal_includes/HLSLCrossCompilerContext.h" #include "src/internal_includes/LoopTransform.h" #include "src/internal_includes/Shader.h" @@ -9,366 +8,363 @@ namespace HLSLcc { - - struct LoopInfo - { - public: - LoopInfo() : m_StartLoop(0), m_EndLoop(0), m_ExitPoints(), m_IsSwitch(false) {} - - Instruction * m_StartLoop; // OPCODE_LOOP - Instruction * m_EndLoop; // OPCODE_ENDLOOP that matches the LOOP above. - std::vector m_ExitPoints; // Any BREAK/RET/BREAKC instructions within the same loop depth - bool m_IsSwitch; // True if this is a switch-case and not a LOOP/ENDLOOP pair. Used as a helper when parsing. - }; - - typedef std::list Loops; - - // Build a loopinfo array of all the loops in this shader phase - void BuildLoopInfo(ShaderPhase &phase, Loops &res) - { - using namespace std; - res.clear(); - - // A stack of loopinfo elements (stored in res) - list loopStack; - - // Storage for dummy LoopInfo elements to be used for switch-cases. We don't want them cluttering the Loops list so store them here. - list dummyLIForSwitches; - - for (std::vector::iterator instItr = phase.psInst.begin(); instItr != phase.psInst.end(); instItr++) - { - Instruction *i = &*instItr; - - if (i->eOpcode == OPCODE_LOOP) - { - LoopInfo *currLoopInfo = &*res.insert(res.end(), LoopInfo()); - currLoopInfo->m_StartLoop = i; - loopStack.push_front(currLoopInfo); - } - else if(i->eOpcode == OPCODE_ENDLOOP) - { - ASSERT(!loopStack.empty()); - LoopInfo *li = *loopStack.begin(); - loopStack.pop_front(); - li->m_EndLoop = i; - } - else if (i->eOpcode == OPCODE_SWITCH) - { - // Create a dummy entry into the stack - LoopInfo *li = &*dummyLIForSwitches.insert(dummyLIForSwitches.end(), LoopInfo()); - li->m_IsSwitch = true; - loopStack.push_front(li); - } - else if (i->eOpcode == OPCODE_ENDSWITCH) - { - ASSERT(!loopStack.empty()); - LoopInfo *li = *loopStack.begin(); - loopStack.pop_front(); - ASSERT(li->m_IsSwitch); - } - else if (i->eOpcode == OPCODE_BREAK || i->eOpcode == OPCODE_BREAKC) - { - // Get the current loopstack head - ASSERT(!loopStack.empty()); - LoopInfo *li = *loopStack.begin(); - // Ignore breaks from switch-cases - if(!li->m_IsSwitch) - { - li->m_ExitPoints.push_back(i); - } - } - } - - } - - // Returns true if the given instruction is a non-vectorized int or uint comparison instruction that reads from at least one temp and writes to a temp - static bool IsScalarTempComparisonInstruction(const Instruction *i) - { - switch (i->eOpcode) - { - default: - return false; - case OPCODE_IGE: - case OPCODE_ILT: - case OPCODE_IEQ: - case OPCODE_INE: - case OPCODE_UGE: - case OPCODE_ULT: - break; - } - - if (i->asOperands[0].eType != OPERAND_TYPE_TEMP) - return false; - - int tempOp = -1; - if (i->asOperands[1].eType == OPERAND_TYPE_TEMP) - tempOp = 1; - else if (i->asOperands[2].eType == OPERAND_TYPE_TEMP) - tempOp = 2; - - // Also reject comparisons where we compare temp.x vs temp.y - if (i->asOperands[1].eType == OPERAND_TYPE_TEMP && i->asOperands[2].eType == OPERAND_TYPE_TEMP && i->asOperands[1].ui32RegisterNumber == i->asOperands[2].ui32RegisterNumber) - return false; - - if (tempOp == -1) - return false; - - if (i->asOperands[0].GetNumSwizzleElements() != 1) - return false; - - return true; - } - - // Returns true iff both instructions perform identical operation. For the purposes of Loop transformation, we only consider operations of type tX = tX imm32 - static bool AreInstructionsIdentical(const Instruction *a, const Instruction *b) - { - if (a->eOpcode != b->eOpcode) - return false; - ASSERT(a->ui32NumOperands == b->ui32NumOperands); - uint32_t dstReg = 0; - if (a->asOperands[0].eType != OPERAND_TYPE_TEMP) - return false; - dstReg = a->asOperands[0].ui32RegisterNumber; - - for (uint32_t i = 0; i < a->ui32NumOperands; i++) - { - const Operand &aop = a->asOperands[i]; - const Operand &bop = b->asOperands[i]; - if (aop.eType != bop.eType) - return false; - - if (aop.GetAccessMask() != bop.GetAccessMask()) - return false; - - if (aop.GetNumSwizzleElements() != 1) - return false; - - if (aop.eType == OPERAND_TYPE_TEMP) - { - if (aop.ui32RegisterNumber != bop.ui32RegisterNumber) - return false; - if (aop.ui32RegisterNumber != dstReg) - return false; - } - else if (aop.eType == OPERAND_TYPE_IMMEDIATE32) - { - if (memcmp(aop.afImmediates, bop.afImmediates, 4 * sizeof(float)) != 0) - return false; - } - } - return true; - } - - // Attempt to transform a single loop into a for-statement - static void AttemptLoopTransform(HLSLCrossCompilerContext *psContext, ShaderPhase &phase, LoopInfo &li) - { - // In order to transform a loop into a for, the following has to hold: - // - The loop must start with a comparison instruction where one of the src operands is a temp (induction variable), followed by OPCODE_BREAKC. - // - The loop must end with an arithmetic operation (SUB or ADD) where the dest operand is the same temp as one of the sources in the comparison instruction above - // Additionally, if the loop induction variable is initialized before the start of the loop and it has only uses inside the LOOP/ENDLOOP pair, we can declare that inside the for statement. - // Also, the loop induction variable must be standalone (as in, never used as part of a larger vector) - - Instruction *cmpInst = li.m_StartLoop + 1; - - if (!IsScalarTempComparisonInstruction(cmpInst)) - return; - - Instruction *breakInst = li.m_StartLoop + 2; - if (breakInst->eOpcode != OPCODE_BREAKC) - return; - if (breakInst->asOperands[0].eType != OPERAND_TYPE_TEMP) - return; - if (breakInst->asOperands[0].ui32RegisterNumber != cmpInst->asOperands[0].ui32RegisterNumber) - return; - - // Check that the comparison result isn't used anywhere else - if (cmpInst->m_Uses.size() != 1) - return; - - ASSERT(cmpInst->m_Uses[0].m_Inst == breakInst); - - // Ok, at least we have the comparison + breakc combo at top. Try to find the induction variable - uint32_t inductionVarIdx = 0; - - Instruction *lastInst = li.m_EndLoop - 1; - if (lastInst->eOpcode != OPCODE_IADD) - return; - if (lastInst->asOperands[0].eType != OPERAND_TYPE_TEMP) - return; - - if (lastInst->asOperands[0].GetNumSwizzleElements() != 1) - return; - - uint32_t indVar = lastInst->asOperands[0].ui32RegisterNumber; - // Verify that the induction variable actually matches. - if (cmpInst->asOperands[1].eType == OPERAND_TYPE_TEMP && cmpInst->asOperands[1].ui32RegisterNumber == indVar) - inductionVarIdx = 1; - else if (cmpInst->asOperands[2].eType == OPERAND_TYPE_TEMP && cmpInst->asOperands[2].ui32RegisterNumber == indVar) - inductionVarIdx = 2; - else - return; - - // Verify that we also read from the induction variable in the last instruction - if (!((lastInst->asOperands[1].eType == OPERAND_TYPE_TEMP && lastInst->asOperands[1].ui32RegisterNumber == indVar) || - (lastInst->asOperands[2].eType == OPERAND_TYPE_TEMP && lastInst->asOperands[2].ui32RegisterNumber == indVar))) - return; - - // Nvidia compiler bug workaround: The shader compiler tries to be smart and unrolls constant loops, - // but then fails miserably if the loop variable is used as an index to UAV loads/stores or some other cases ("array access too complex") - // This is also triggered when the driver optimizer sees "simple enough" arithmetics (whatever that is) done on the loop variable before indexing. - // So, disable for-loop transformation altogether whenever we see a UAV load or store inside a loop. - if(psContext->psShader->eTargetLanguage >= LANG_400 && psContext->psShader->eTargetLanguage < LANG_GL_LAST && !psContext->IsVulkan()) - { - for (auto itr = li.m_StartLoop; itr != li.m_EndLoop; itr++) - { - switch (itr->eOpcode) - { - case OPCODE_LD_RAW: - case OPCODE_LD_STRUCTURED: - case OPCODE_LD_UAV_TYPED: - case OPCODE_STORE_RAW: - case OPCODE_STORE_STRUCTURED: - case OPCODE_STORE_UAV_TYPED: - return; // Nope, can't do a for, not even a partial one. - default: - break; - } - } - } - - // One more thing to check: The comparison input may only see 1 definition that originates from inside the loop range: the one in lastInst. - // Anything else means that there's a continue statement, or another break/breakc and that means that lastInst wouldn't get called. - // Of course, if all those instructions are identical, then it's fine. - // Ideally, if there's only one definition that's from outside the loop range, then we can use that as the initializer, as well. - - Instruction *initializer = NULL; - std::vector definitionsOutsideRange; - std::vector definitionsInsideRange; - std::for_each(cmpInst->asOperands[inductionVarIdx].m_Defines.begin(), cmpInst->asOperands[inductionVarIdx].m_Defines.end(), [&](const Operand::Define &def) - { - if (def.m_Inst < li.m_StartLoop || def.m_Inst > li.m_EndLoop) - definitionsOutsideRange.push_back(&def); - else - definitionsInsideRange.push_back(&def); - }); - - if (definitionsInsideRange.size() != 1) - { - // All definitions must be identical - for (std::vector::iterator itr = definitionsInsideRange.begin()+1; itr != definitionsInsideRange.end(); itr++) - { - if (!AreInstructionsIdentical((*itr)->m_Inst, definitionsInsideRange[0]->m_Inst)) - return; - } - } - - ASSERT(definitionsOutsideRange.size() > 0); - if (definitionsOutsideRange.size() == 1) - initializer = definitionsOutsideRange[0]->m_Inst; - - // Initializer must only write to one component - if (initializer && initializer->asOperands[0].GetNumSwizzleElements() != 1) - initializer = 0; - // Initializer data type must be int or uint - if (initializer) - { - SHADER_VARIABLE_TYPE dataType = initializer->asOperands[0].GetDataType(psContext); - if (dataType != SVT_INT && dataType != SVT_UINT) - return; - } - - // Check that the initializer is only used within the range so we can move it to for statement - if (initializer) - { - bool hasUsesOutsideRange = false; - std::for_each(initializer->m_Uses.begin(), initializer->m_Uses.end(), [&](const Instruction::Use &u) - { - if (u.m_Inst < li.m_StartLoop || u.m_Inst > li.m_EndLoop) - hasUsesOutsideRange = true; - }); - // Has outside uses? we cannot pull that up to the for statement - if (hasUsesOutsideRange) - initializer = 0; - } - - // Check that the loop adder instruction only has uses inside the loop range, otherwise we cannot move the initializer either - if (initializer) - { - bool cannotDoInitializer = false; - for (auto itr = lastInst->m_Uses.begin(); itr != lastInst->m_Uses.end(); itr++) - { - const Instruction::Use &u = *itr; - if (u.m_Inst < li.m_StartLoop || u.m_Inst > li.m_EndLoop) - { - cannotDoInitializer = true; - break; - } - // Also check that the uses are not vector ops (temp splitting has already pulled everything to .x if this is a standalone var) - if (u.m_Op->GetAccessMask() != 1) - { - cannotDoInitializer = true; - break; - } - } - // Has outside uses? we cannot pull that up to the for statement - if (cannotDoInitializer) - initializer = 0; - } - - - if (initializer) - { - // We can declare the initializer in the for loop header, allocate a new number for it and change all uses into that. - uint32_t newRegister = phase.m_NextFreeTempRegister++; - li.m_StartLoop->m_InductorRegister = newRegister; - std::for_each(initializer->m_Uses.begin(), initializer->m_Uses.end(), [newRegister](const Instruction::Use &u) - { - u.m_Op->m_ForLoopInductorName = newRegister; - }); - // Also tweak the destinations for cmpInst, and lastInst - if (cmpInst->asOperands[1].eType == OPERAND_TYPE_TEMP && cmpInst->asOperands[1].ui32RegisterNumber == initializer->asOperands[0].ui32RegisterNumber) - cmpInst->asOperands[1].m_ForLoopInductorName = newRegister; - else - cmpInst->asOperands[2].m_ForLoopInductorName = newRegister; - - if (lastInst->asOperands[1].eType == OPERAND_TYPE_TEMP && lastInst->asOperands[1].ui32RegisterNumber == initializer->asOperands[0].ui32RegisterNumber) - lastInst->asOperands[1].m_ForLoopInductorName = newRegister; - else - lastInst->asOperands[2].m_ForLoopInductorName = newRegister; - - lastInst->asOperands[0].m_ForLoopInductorName = newRegister; - initializer->asOperands[0].m_ForLoopInductorName = newRegister; - } - - // This loop can be transformed to for-loop. Do the necessary magicks. - li.m_StartLoop->m_LoopInductors[0] = initializer; - li.m_StartLoop->m_LoopInductors[1] = cmpInst; - li.m_StartLoop->m_LoopInductors[2] = breakInst; - li.m_StartLoop->m_LoopInductors[3] = lastInst; - - if (initializer) - initializer->m_SkipTranslation = true; - cmpInst->m_SkipTranslation = true; - breakInst->m_SkipTranslation = true; - lastInst->m_SkipTranslation = true; - - } - - void DoLoopTransform(HLSLCrossCompilerContext *psContext, ShaderPhase &phase) - { - Loops loops; - BuildLoopInfo(phase, loops); - - std::for_each(loops.begin(), loops.end(), [&phase, psContext](LoopInfo &li) - { - // Some sanity checks: start and end points must be initialized, we shouldn't have any switches here, and each loop must have at least one exit point - // Also that there's at least 2 instructions in loop body - ASSERT(li.m_StartLoop != 0); - ASSERT(li.m_EndLoop != 0); - ASSERT(li.m_EndLoop > li.m_StartLoop + 2); - ASSERT(!li.m_IsSwitch); - ASSERT(!li.m_ExitPoints.empty()); - AttemptLoopTransform(psContext, phase, li); - }); - } -}; \ No newline at end of file + struct LoopInfo + { + public: + LoopInfo() : m_StartLoop(0), m_EndLoop(0), m_ExitPoints(), m_IsSwitch(false) {} + + Instruction * m_StartLoop; // OPCODE_LOOP + Instruction * m_EndLoop; // OPCODE_ENDLOOP that matches the LOOP above. + std::vector m_ExitPoints; // Any BREAK/RET/BREAKC instructions within the same loop depth + bool m_IsSwitch; // True if this is a switch-case and not a LOOP/ENDLOOP pair. Used as a helper when parsing. + }; + + typedef std::list Loops; + + // Build a loopinfo array of all the loops in this shader phase + void BuildLoopInfo(ShaderPhase &phase, Loops &res) + { + using namespace std; + res.clear(); + + // A stack of loopinfo elements (stored in res) + list loopStack; + + // Storage for dummy LoopInfo elements to be used for switch-cases. We don't want them cluttering the Loops list so store them here. + list dummyLIForSwitches; + + for (std::vector::iterator instItr = phase.psInst.begin(); instItr != phase.psInst.end(); instItr++) + { + Instruction *i = &*instItr; + + if (i->eOpcode == OPCODE_LOOP) + { + LoopInfo *currLoopInfo = &*res.insert(res.end(), LoopInfo()); + currLoopInfo->m_StartLoop = i; + loopStack.push_front(currLoopInfo); + } + else if (i->eOpcode == OPCODE_ENDLOOP) + { + ASSERT(!loopStack.empty()); + LoopInfo *li = *loopStack.begin(); + loopStack.pop_front(); + li->m_EndLoop = i; + } + else if (i->eOpcode == OPCODE_SWITCH) + { + // Create a dummy entry into the stack + LoopInfo *li = &*dummyLIForSwitches.insert(dummyLIForSwitches.end(), LoopInfo()); + li->m_IsSwitch = true; + loopStack.push_front(li); + } + else if (i->eOpcode == OPCODE_ENDSWITCH) + { + ASSERT(!loopStack.empty()); + LoopInfo *li = *loopStack.begin(); + loopStack.pop_front(); + ASSERT(li->m_IsSwitch); + } + else if (i->eOpcode == OPCODE_BREAK || i->eOpcode == OPCODE_BREAKC) + { + // Get the current loopstack head + ASSERT(!loopStack.empty()); + LoopInfo *li = *loopStack.begin(); + // Ignore breaks from switch-cases + if (!li->m_IsSwitch) + { + li->m_ExitPoints.push_back(i); + } + } + } + } + + // Returns true if the given instruction is a non-vectorized int or uint comparison instruction that reads from at least one temp and writes to a temp + static bool IsScalarTempComparisonInstruction(const Instruction *i) + { + switch (i->eOpcode) + { + default: + return false; + case OPCODE_IGE: + case OPCODE_ILT: + case OPCODE_IEQ: + case OPCODE_INE: + case OPCODE_UGE: + case OPCODE_ULT: + break; + } + + if (i->asOperands[0].eType != OPERAND_TYPE_TEMP) + return false; + + int tempOp = -1; + if (i->asOperands[1].eType == OPERAND_TYPE_TEMP) + tempOp = 1; + else if (i->asOperands[2].eType == OPERAND_TYPE_TEMP) + tempOp = 2; + + // Also reject comparisons where we compare temp.x vs temp.y + if (i->asOperands[1].eType == OPERAND_TYPE_TEMP && i->asOperands[2].eType == OPERAND_TYPE_TEMP && i->asOperands[1].ui32RegisterNumber == i->asOperands[2].ui32RegisterNumber) + return false; + + if (tempOp == -1) + return false; + + if (i->asOperands[0].GetNumSwizzleElements() != 1) + return false; + + return true; + } + + // Returns true iff both instructions perform identical operation. For the purposes of Loop transformation, we only consider operations of type tX = tX imm32 + static bool AreInstructionsIdentical(const Instruction *a, const Instruction *b) + { + if (a->eOpcode != b->eOpcode) + return false; + ASSERT(a->ui32NumOperands == b->ui32NumOperands); + uint32_t dstReg = 0; + if (a->asOperands[0].eType != OPERAND_TYPE_TEMP) + return false; + dstReg = a->asOperands[0].ui32RegisterNumber; + + for (uint32_t i = 0; i < a->ui32NumOperands; i++) + { + const Operand &aop = a->asOperands[i]; + const Operand &bop = b->asOperands[i]; + if (aop.eType != bop.eType) + return false; + + if (aop.GetAccessMask() != bop.GetAccessMask()) + return false; + + if (aop.GetNumSwizzleElements() != 1) + return false; + + if (aop.eType == OPERAND_TYPE_TEMP) + { + if (aop.ui32RegisterNumber != bop.ui32RegisterNumber) + return false; + if (aop.ui32RegisterNumber != dstReg) + return false; + } + else if (aop.eType == OPERAND_TYPE_IMMEDIATE32) + { + if (memcmp(aop.afImmediates, bop.afImmediates, 4 * sizeof(float)) != 0) + return false; + } + } + return true; + } + + // Attempt to transform a single loop into a for-statement + static void AttemptLoopTransform(HLSLCrossCompilerContext *psContext, ShaderPhase &phase, LoopInfo &li) + { + // In order to transform a loop into a for, the following has to hold: + // - The loop must start with a comparison instruction where one of the src operands is a temp (induction variable), followed by OPCODE_BREAKC. + // - The loop must end with an arithmetic operation (SUB or ADD) where the dest operand is the same temp as one of the sources in the comparison instruction above + // Additionally, if the loop induction variable is initialized before the start of the loop and it has only uses inside the LOOP/ENDLOOP pair, we can declare that inside the for statement. + // Also, the loop induction variable must be standalone (as in, never used as part of a larger vector) + + Instruction *cmpInst = li.m_StartLoop + 1; + + if (!IsScalarTempComparisonInstruction(cmpInst)) + return; + + Instruction *breakInst = li.m_StartLoop + 2; + if (breakInst->eOpcode != OPCODE_BREAKC) + return; + if (breakInst->asOperands[0].eType != OPERAND_TYPE_TEMP) + return; + if (breakInst->asOperands[0].ui32RegisterNumber != cmpInst->asOperands[0].ui32RegisterNumber) + return; + + // Check that the comparison result isn't used anywhere else + if (cmpInst->m_Uses.size() != 1) + return; + + ASSERT(cmpInst->m_Uses[0].m_Inst == breakInst); + + // Ok, at least we have the comparison + breakc combo at top. Try to find the induction variable + uint32_t inductionVarIdx = 0; + + Instruction *lastInst = li.m_EndLoop - 1; + if (lastInst->eOpcode != OPCODE_IADD) + return; + if (lastInst->asOperands[0].eType != OPERAND_TYPE_TEMP) + return; + + if (lastInst->asOperands[0].GetNumSwizzleElements() != 1) + return; + + uint32_t indVar = lastInst->asOperands[0].ui32RegisterNumber; + // Verify that the induction variable actually matches. + if (cmpInst->asOperands[1].eType == OPERAND_TYPE_TEMP && cmpInst->asOperands[1].ui32RegisterNumber == indVar) + inductionVarIdx = 1; + else if (cmpInst->asOperands[2].eType == OPERAND_TYPE_TEMP && cmpInst->asOperands[2].ui32RegisterNumber == indVar) + inductionVarIdx = 2; + else + return; + + // Verify that we also read from the induction variable in the last instruction + if (!((lastInst->asOperands[1].eType == OPERAND_TYPE_TEMP && lastInst->asOperands[1].ui32RegisterNumber == indVar) || + (lastInst->asOperands[2].eType == OPERAND_TYPE_TEMP && lastInst->asOperands[2].ui32RegisterNumber == indVar))) + return; + + // Nvidia compiler bug workaround: The shader compiler tries to be smart and unrolls constant loops, + // but then fails miserably if the loop variable is used as an index to UAV loads/stores or some other cases ("array access too complex") + // This is also triggered when the driver optimizer sees "simple enough" arithmetics (whatever that is) done on the loop variable before indexing. + // So, disable for-loop transformation altogether whenever we see a UAV load or store inside a loop. + if (psContext->psShader->eTargetLanguage >= LANG_400 && psContext->psShader->eTargetLanguage < LANG_GL_LAST && !psContext->IsVulkan()) + { + for (auto itr = li.m_StartLoop; itr != li.m_EndLoop; itr++) + { + switch (itr->eOpcode) + { + case OPCODE_LD_RAW: + case OPCODE_LD_STRUCTURED: + case OPCODE_LD_UAV_TYPED: + case OPCODE_STORE_RAW: + case OPCODE_STORE_STRUCTURED: + case OPCODE_STORE_UAV_TYPED: + return; // Nope, can't do a for, not even a partial one. + default: + break; + } + } + } + + // One more thing to check: The comparison input may only see 1 definition that originates from inside the loop range: the one in lastInst. + // Anything else means that there's a continue statement, or another break/breakc and that means that lastInst wouldn't get called. + // Of course, if all those instructions are identical, then it's fine. + // Ideally, if there's only one definition that's from outside the loop range, then we can use that as the initializer, as well. + + Instruction *initializer = NULL; + std::vector definitionsOutsideRange; + std::vector definitionsInsideRange; + std::for_each(cmpInst->asOperands[inductionVarIdx].m_Defines.begin(), cmpInst->asOperands[inductionVarIdx].m_Defines.end(), [&](const Operand::Define &def) + { + if (def.m_Inst < li.m_StartLoop || def.m_Inst > li.m_EndLoop) + definitionsOutsideRange.push_back(&def); + else + definitionsInsideRange.push_back(&def); + }); + + if (definitionsInsideRange.size() != 1) + { + // All definitions must be identical + for (std::vector::iterator itr = definitionsInsideRange.begin() + 1; itr != definitionsInsideRange.end(); itr++) + { + if (!AreInstructionsIdentical((*itr)->m_Inst, definitionsInsideRange[0]->m_Inst)) + return; + } + } + + ASSERT(definitionsOutsideRange.size() > 0); + if (definitionsOutsideRange.size() == 1) + initializer = definitionsOutsideRange[0]->m_Inst; + + // Initializer must only write to one component + if (initializer && initializer->asOperands[0].GetNumSwizzleElements() != 1) + initializer = 0; + // Initializer data type must be int or uint + if (initializer) + { + SHADER_VARIABLE_TYPE dataType = initializer->asOperands[0].GetDataType(psContext); + if (dataType != SVT_INT && dataType != SVT_UINT) + return; + } + + // Check that the initializer is only used within the range so we can move it to for statement + if (initializer) + { + bool hasUsesOutsideRange = false; + std::for_each(initializer->m_Uses.begin(), initializer->m_Uses.end(), [&](const Instruction::Use &u) + { + if (u.m_Inst < li.m_StartLoop || u.m_Inst > li.m_EndLoop) + hasUsesOutsideRange = true; + }); + // Has outside uses? we cannot pull that up to the for statement + if (hasUsesOutsideRange) + initializer = 0; + } + + // Check that the loop adder instruction only has uses inside the loop range, otherwise we cannot move the initializer either + if (initializer) + { + bool cannotDoInitializer = false; + for (auto itr = lastInst->m_Uses.begin(); itr != lastInst->m_Uses.end(); itr++) + { + const Instruction::Use &u = *itr; + if (u.m_Inst < li.m_StartLoop || u.m_Inst > li.m_EndLoop) + { + cannotDoInitializer = true; + break; + } + // Also check that the uses are not vector ops (temp splitting has already pulled everything to .x if this is a standalone var) + if (u.m_Op->GetAccessMask() != 1) + { + cannotDoInitializer = true; + break; + } + } + // Has outside uses? we cannot pull that up to the for statement + if (cannotDoInitializer) + initializer = 0; + } + + + if (initializer) + { + // We can declare the initializer in the for loop header, allocate a new number for it and change all uses into that. + uint32_t newRegister = phase.m_NextFreeTempRegister++; + li.m_StartLoop->m_InductorRegister = newRegister; + std::for_each(initializer->m_Uses.begin(), initializer->m_Uses.end(), [newRegister](const Instruction::Use &u) + { + u.m_Op->m_ForLoopInductorName = newRegister; + }); + // Also tweak the destinations for cmpInst, and lastInst + if (cmpInst->asOperands[1].eType == OPERAND_TYPE_TEMP && cmpInst->asOperands[1].ui32RegisterNumber == initializer->asOperands[0].ui32RegisterNumber) + cmpInst->asOperands[1].m_ForLoopInductorName = newRegister; + else + cmpInst->asOperands[2].m_ForLoopInductorName = newRegister; + + if (lastInst->asOperands[1].eType == OPERAND_TYPE_TEMP && lastInst->asOperands[1].ui32RegisterNumber == initializer->asOperands[0].ui32RegisterNumber) + lastInst->asOperands[1].m_ForLoopInductorName = newRegister; + else + lastInst->asOperands[2].m_ForLoopInductorName = newRegister; + + lastInst->asOperands[0].m_ForLoopInductorName = newRegister; + initializer->asOperands[0].m_ForLoopInductorName = newRegister; + } + + // This loop can be transformed to for-loop. Do the necessary magicks. + li.m_StartLoop->m_LoopInductors[0] = initializer; + li.m_StartLoop->m_LoopInductors[1] = cmpInst; + li.m_StartLoop->m_LoopInductors[2] = breakInst; + li.m_StartLoop->m_LoopInductors[3] = lastInst; + + if (initializer) + initializer->m_SkipTranslation = true; + cmpInst->m_SkipTranslation = true; + breakInst->m_SkipTranslation = true; + lastInst->m_SkipTranslation = true; + } + + void DoLoopTransform(HLSLCrossCompilerContext *psContext, ShaderPhase &phase) + { + Loops loops; + BuildLoopInfo(phase, loops); + + std::for_each(loops.begin(), loops.end(), [&phase, psContext](LoopInfo &li) + { + // Some sanity checks: start and end points must be initialized, we shouldn't have any switches here, and each loop must have at least one exit point + // Also that there's at least 2 instructions in loop body + ASSERT(li.m_StartLoop != 0); + ASSERT(li.m_EndLoop != 0); + ASSERT(li.m_EndLoop > li.m_StartLoop + 2); + ASSERT(!li.m_IsSwitch); + ASSERT(!li.m_ExitPoints.empty()); + AttemptLoopTransform(psContext, phase, li); + }); + } +} diff --git a/src/Operand.cpp b/src/Operand.cpp index 0502980..49beaac 100644 --- a/src/Operand.cpp +++ b/src/Operand.cpp @@ -1,4 +1,3 @@ - #include "internal_includes/Operand.h" #include "internal_includes/debug.h" #include "internal_includes/HLSLccToolkit.h" @@ -8,587 +7,573 @@ uint32_t Operand::GetAccessMask() const { - int i; - uint32_t accessMask = 0; - // TODO: Destination writemask can (AND DOES) affect access from sources, but do it conservatively for now. - switch (eSelMode) - { - default: - case OPERAND_4_COMPONENT_MASK_MODE: - // Update access mask - accessMask = ui32CompMask; - if (accessMask == 0) - accessMask = OPERAND_4_COMPONENT_MASK_ALL; - break; - - case OPERAND_4_COMPONENT_SWIZZLE_MODE: - accessMask = 0; - for (i = 0; i < 4; i++) - accessMask |= 1 << (aui32Swizzle[i]); - break; - - case OPERAND_4_COMPONENT_SELECT_1_MODE: - accessMask = 1 << (aui32Swizzle[0]); - break; - - } - ASSERT(accessMask != 0); - return accessMask; + int i; + uint32_t accessMask = 0; + // TODO: Destination writemask can (AND DOES) affect access from sources, but do it conservatively for now. + switch (eSelMode) + { + default: + case OPERAND_4_COMPONENT_MASK_MODE: + // Update access mask + accessMask = ui32CompMask; + if (accessMask == 0) + accessMask = OPERAND_4_COMPONENT_MASK_ALL; + break; + + case OPERAND_4_COMPONENT_SWIZZLE_MODE: + accessMask = 0; + for (i = 0; i < 4; i++) + accessMask |= 1 << (aui32Swizzle[i]); + break; + + case OPERAND_4_COMPONENT_SELECT_1_MODE: + accessMask = 1 << (aui32Swizzle[0]); + break; + } + ASSERT(accessMask != 0); + return accessMask; } int Operand::GetMaxComponent() const { - if (iWriteMaskEnabled && - iNumComponents == 4) - { - //Component Mask - if (eSelMode == OPERAND_4_COMPONENT_MASK_MODE) - { - if (ui32CompMask != 0 && ui32CompMask != (OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y | OPERAND_4_COMPONENT_MASK_Z | OPERAND_4_COMPONENT_MASK_W)) - { - if (ui32CompMask & OPERAND_4_COMPONENT_MASK_W) - { - return 4; - } - if (ui32CompMask & OPERAND_4_COMPONENT_MASK_Z) - { - return 3; - } - if (ui32CompMask & OPERAND_4_COMPONENT_MASK_Y) - { - return 2; - } - if (ui32CompMask & OPERAND_4_COMPONENT_MASK_X) - { - return 1; - } - } - } - else - //Component Swizzle - if (eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE) - { - if (ui32Swizzle == NO_SWIZZLE) - return 4; - - uint32_t res = 0; - for (int i = 0; i < 4; i++) - { - res = std::max(aui32Swizzle[i], res); - } - return (int)res + 1; - } - else - if (eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE) - { - return 1; - } - } - - return 4; + if (iWriteMaskEnabled && + iNumComponents == 4) + { + //Component Mask + if (eSelMode == OPERAND_4_COMPONENT_MASK_MODE) + { + if (ui32CompMask != 0 && ui32CompMask != (OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y | OPERAND_4_COMPONENT_MASK_Z | OPERAND_4_COMPONENT_MASK_W)) + { + if (ui32CompMask & OPERAND_4_COMPONENT_MASK_W) + { + return 4; + } + if (ui32CompMask & OPERAND_4_COMPONENT_MASK_Z) + { + return 3; + } + if (ui32CompMask & OPERAND_4_COMPONENT_MASK_Y) + { + return 2; + } + if (ui32CompMask & OPERAND_4_COMPONENT_MASK_X) + { + return 1; + } + } + } + else + //Component Swizzle + if (eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE) + { + if (ui32Swizzle == NO_SWIZZLE) + return 4; + + uint32_t res = 0; + for (int i = 0; i < 4; i++) + { + res = std::max(aui32Swizzle[i], res); + } + return (int)res + 1; + } + else if (eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE) + { + return 1; + } + } + + return 4; } //Single component repeated //e..g .wwww bool Operand::IsSwizzleReplicated() const { - if (iWriteMaskEnabled && - iNumComponents == 4) - { - if (eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE) - { - if (ui32Swizzle == WWWW_SWIZZLE || - ui32Swizzle == ZZZZ_SWIZZLE || - ui32Swizzle == YYYY_SWIZZLE || - ui32Swizzle == XXXX_SWIZZLE) - { - return true; - } - } - } - return false; + if (iWriteMaskEnabled && + iNumComponents == 4) + { + if (eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE) + { + if (ui32Swizzle == WWWW_SWIZZLE || + ui32Swizzle == ZZZZ_SWIZZLE || + ui32Swizzle == YYYY_SWIZZLE || + ui32Swizzle == XXXX_SWIZZLE) + { + return true; + } + } + } + return false; } - // Get the number of elements returned by operand, taking additional component mask into account uint32_t Operand::GetNumSwizzleElements(uint32_t _ui32CompMask /* = OPERAND_4_COMPONENT_MASK_ALL */) const { - uint32_t count = 0; - - switch (eType) - { - case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED: - return 1; // TODO: does mask make any sense here? - case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP: - case OPERAND_TYPE_INPUT_THREAD_ID: - case OPERAND_TYPE_INPUT_THREAD_GROUP_ID: - // Adjust component count and break to more processing - ((Operand *)this)->iNumComponents = 3; - break; - case OPERAND_TYPE_IMMEDIATE32: - case OPERAND_TYPE_IMMEDIATE64: - case OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL: - case OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL: - case OPERAND_TYPE_OUTPUT_DEPTH: - { - // Translate numComponents into bitmask - // 1 -> 1, 2 -> 3, 3 -> 7 and 4 -> 15 - uint32_t compMask = (1 << iNumComponents) - 1; - - compMask &= _ui32CompMask; - // Calculate bits left in compMask - return HLSLcc::GetNumberBitsSet(compMask); - } - default: - { - break; - } - } - - if (iWriteMaskEnabled && - iNumComponents != 1) - { - //Component Mask - if (eSelMode == OPERAND_4_COMPONENT_MASK_MODE) - { - uint32_t compMask = ui32CompMask; - if (compMask == 0) - compMask = OPERAND_4_COMPONENT_MASK_ALL; - compMask &= _ui32CompMask; - - if (compMask == OPERAND_4_COMPONENT_MASK_ALL) - return 4; - - if (compMask & OPERAND_4_COMPONENT_MASK_X) - { - count++; - } - if (compMask & OPERAND_4_COMPONENT_MASK_Y) - { - count++; - } - if (compMask & OPERAND_4_COMPONENT_MASK_Z) - { - count++; - } - if (compMask & OPERAND_4_COMPONENT_MASK_W) - { - count++; - } - } - else - //Component Swizzle - if (eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE) - { - uint32_t i; - for (i = 0; i < 4; ++i) - { - if ((_ui32CompMask & (1 << i)) == 0) - continue; - - count++; - } - } - else - if (eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE) - { - if (aui32Swizzle[0] == OPERAND_4_COMPONENT_X && (_ui32CompMask & OPERAND_4_COMPONENT_MASK_X)) - { - count++; - } - else - if (aui32Swizzle[0] == OPERAND_4_COMPONENT_Y && (_ui32CompMask & OPERAND_4_COMPONENT_MASK_Y)) - { - count++; - } - else - if (aui32Swizzle[0] == OPERAND_4_COMPONENT_Z && (_ui32CompMask & OPERAND_4_COMPONENT_MASK_Z)) - { - count++; - } - else - if (aui32Swizzle[0] == OPERAND_4_COMPONENT_W && (_ui32CompMask & OPERAND_4_COMPONENT_MASK_W)) - { - count++; - } - } - - //Component Select 1 - } - - if (!count) - { - // Translate numComponents into bitmask - // 1 -> 1, 2 -> 3, 3 -> 7 and 4 -> 15 - uint32_t compMask = (1 << iNumComponents) - 1; - - compMask &= _ui32CompMask; - // Calculate bits left in compMask - return HLSLcc::GetNumberBitsSet(compMask); - } - - return count; + uint32_t count = 0; + + switch (eType) + { + case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP: + case OPERAND_TYPE_INPUT_THREAD_ID: + case OPERAND_TYPE_INPUT_THREAD_GROUP_ID: + // Adjust component count and break to more processing + ((Operand *)this)->iNumComponents = 3; + break; + case OPERAND_TYPE_IMMEDIATE32: + case OPERAND_TYPE_IMMEDIATE64: + case OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL: + case OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL: + case OPERAND_TYPE_OUTPUT_DEPTH: + { + // Translate numComponents into bitmask + // 1 -> 1, 2 -> 3, 3 -> 7 and 4 -> 15 + uint32_t compMask = (1 << iNumComponents) - 1; + + compMask &= _ui32CompMask; + // Calculate bits left in compMask + return HLSLcc::GetNumberBitsSet(compMask); + } + default: + { + break; + } + } + + if (iWriteMaskEnabled && + iNumComponents != 1) + { + //Component Mask + if (eSelMode == OPERAND_4_COMPONENT_MASK_MODE) + { + uint32_t compMask = ui32CompMask; + if (compMask == 0) + compMask = OPERAND_4_COMPONENT_MASK_ALL; + compMask &= _ui32CompMask; + + if (compMask == OPERAND_4_COMPONENT_MASK_ALL) + return 4; + + if (compMask & OPERAND_4_COMPONENT_MASK_X) + { + count++; + } + if (compMask & OPERAND_4_COMPONENT_MASK_Y) + { + count++; + } + if (compMask & OPERAND_4_COMPONENT_MASK_Z) + { + count++; + } + if (compMask & OPERAND_4_COMPONENT_MASK_W) + { + count++; + } + } + else + //Component Swizzle + if (eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE) + { + uint32_t i; + for (i = 0; i < 4; ++i) + { + if ((_ui32CompMask & (1 << i)) == 0) + continue; + + count++; + } + } + else if (eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE) + { + if (aui32Swizzle[0] == OPERAND_4_COMPONENT_X && (_ui32CompMask & OPERAND_4_COMPONENT_MASK_X)) + { + count++; + } + else if (aui32Swizzle[0] == OPERAND_4_COMPONENT_Y && (_ui32CompMask & OPERAND_4_COMPONENT_MASK_Y)) + { + count++; + } + else if (aui32Swizzle[0] == OPERAND_4_COMPONENT_Z && (_ui32CompMask & OPERAND_4_COMPONENT_MASK_Z)) + { + count++; + } + else if (aui32Swizzle[0] == OPERAND_4_COMPONENT_W && (_ui32CompMask & OPERAND_4_COMPONENT_MASK_W)) + { + count++; + } + } + + //Component Select 1 + } + + if (!count) + { + // Translate numComponents into bitmask + // 1 -> 1, 2 -> 3, 3 -> 7 and 4 -> 15 + uint32_t compMask = (1 << iNumComponents) - 1; + + compMask &= _ui32CompMask; + // Calculate bits left in compMask + return HLSLcc::GetNumberBitsSet(compMask); + } + + return count; } // Returns 0 if the register used by the operand is per-vertex, or 1 if per-patch int Operand::GetRegisterSpace(SHADER_TYPE eShaderType, SHADER_PHASE_TYPE eShaderPhaseType) const { - if (eShaderType != HULL_SHADER && eShaderType != DOMAIN_SHADER) - return 0; + if (eShaderType != HULL_SHADER && eShaderType != DOMAIN_SHADER) + return 0; - if (eShaderType == HULL_SHADER && eShaderPhaseType == HS_CTRL_POINT_PHASE) - return 0; + if (eShaderType == HULL_SHADER && eShaderPhaseType == HS_CTRL_POINT_PHASE) + return 0; - if (eShaderType == DOMAIN_SHADER && eType == OPERAND_TYPE_OUTPUT) - return 0; + if (eShaderType == DOMAIN_SHADER && eType == OPERAND_TYPE_OUTPUT) + return 0; - if (eType == OPERAND_TYPE_INPUT_CONTROL_POINT || eType == OPERAND_TYPE_OUTPUT_CONTROL_POINT) - return 0; + if (eType == OPERAND_TYPE_INPUT_CONTROL_POINT || eType == OPERAND_TYPE_OUTPUT_CONTROL_POINT) + return 0; - return 1; + return 1; } int Operand::GetRegisterSpace(const HLSLCrossCompilerContext *psContext) const { - return GetRegisterSpace(psContext->psShader->eShaderType, psContext->psShader->asPhases[psContext->currentPhase].ePhase); + return GetRegisterSpace(psContext->psShader->eShaderType, psContext->psShader->asPhases[psContext->currentPhase].ePhase); } SHADER_VARIABLE_TYPE Operand::GetDataType(HLSLCrossCompilerContext* psContext, SHADER_VARIABLE_TYPE ePreferredTypeForImmediates /* = SVT_INT */) const { - // The min precision qualifier overrides all of the stuff below - switch (eMinPrecision) - { - case OPERAND_MIN_PRECISION_FLOAT_16: - return SVT_FLOAT16; - case OPERAND_MIN_PRECISION_FLOAT_2_8: - return SVT_FLOAT10; - case OPERAND_MIN_PRECISION_SINT_16: - return SVT_INT16; - case OPERAND_MIN_PRECISION_UINT_16: - return SVT_UINT16; - default: - break; - } - - switch (eType) - { - case OPERAND_TYPE_TEMP: - { - SHADER_VARIABLE_TYPE eCurrentType = SVT_FLOAT; - int i = 0; - - if (eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE) - { - return aeDataType[aui32Swizzle[0]]; - } - if (eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE) - { - if (ui32Swizzle == (NO_SWIZZLE)) - { - return aeDataType[0]; - } - - return aeDataType[aui32Swizzle[0]]; - } - - if (eSelMode == OPERAND_4_COMPONENT_MASK_MODE) - { - uint32_t mask = ui32CompMask; - if (!mask) - { - mask = OPERAND_4_COMPONENT_MASK_ALL; - } - for (; i < 4; ++i) - { - if (mask & (1 << i)) - { - eCurrentType = aeDataType[i]; - break; - } - } + // The min precision qualifier overrides all of the stuff below + switch (eMinPrecision) + { + case OPERAND_MIN_PRECISION_FLOAT_16: + return SVT_FLOAT16; + case OPERAND_MIN_PRECISION_FLOAT_2_8: + return SVT_FLOAT10; + case OPERAND_MIN_PRECISION_SINT_16: + return SVT_INT16; + case OPERAND_MIN_PRECISION_UINT_16: + return SVT_UINT16; + default: + break; + } + + switch (eType) + { + case OPERAND_TYPE_TEMP: + { + SHADER_VARIABLE_TYPE eCurrentType = SVT_FLOAT; + int i = 0; + + if (eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE) + { + return aeDataType[aui32Swizzle[0]]; + } + if (eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE) + { + if (ui32Swizzle == (NO_SWIZZLE)) + { + return aeDataType[0]; + } + + return aeDataType[aui32Swizzle[0]]; + } + + if (eSelMode == OPERAND_4_COMPONENT_MASK_MODE) + { + uint32_t mask = ui32CompMask; + if (!mask) + { + mask = OPERAND_4_COMPONENT_MASK_ALL; + } + for (; i < 4; ++i) + { + if (mask & (1 << i)) + { + eCurrentType = aeDataType[i]; + break; + } + } #ifdef _DEBUG - //Check if all elements have the same basic type. - for (; i < 4; ++i) - { - if (mask & (1 << i)) - { - if (eCurrentType != aeDataType[i]) - { - ASSERT(0); - } - } - } + //Check if all elements have the same basic type. + for (; i < 4; ++i) + { + if (mask & (1 << i)) + { + if (eCurrentType != aeDataType[i]) + { + ASSERT(0); + } + } + } #endif - return eCurrentType; - } - - ASSERT(0); - - break; - } - case OPERAND_TYPE_OUTPUT: - { - const uint32_t ui32Register = ui32RegisterNumber; - int regSpace = GetRegisterSpace(psContext); - const ShaderInfo::InOutSignature* psOut = NULL; - - if (regSpace == 0) - psContext->psShader->sInfo.GetOutputSignatureFromRegister(ui32Register, GetAccessMask(), psContext->psShader->ui32CurrentVertexOutputStream, - &psOut); - else { - psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(ui32Register, GetAccessMask(), &psOut, true); - if (!psOut) - return SVT_FLOAT; - } - - ASSERT(psOut != NULL); - if (psOut->eMinPrec != MIN_PRECISION_DEFAULT) - { - switch (psOut->eMinPrec) - { - default: - ASSERT(0); - break; - case MIN_PRECISION_FLOAT_16: - return SVT_FLOAT16; - case MIN_PRECISION_FLOAT_2_8: - if (psContext->psShader->eTargetLanguage == LANG_METAL) - return SVT_FLOAT16; - else - return SVT_FLOAT10; - case MIN_PRECISION_SINT_16: - return SVT_INT16; - case MIN_PRECISION_UINT_16: - return SVT_UINT16; - } - } - if (psOut->eComponentType == INOUT_COMPONENT_UINT32) - { - return SVT_UINT; - } - else if (psOut->eComponentType == INOUT_COMPONENT_SINT32) - { - return SVT_INT; - } - return SVT_FLOAT; - break; - } - case OPERAND_TYPE_INPUT: - { - const uint32_t ui32Register = aui32ArraySizes[iIndexDims - 1]; - int regSpace = GetRegisterSpace(psContext); - const ShaderInfo::InOutSignature* psIn = NULL; - - if (regSpace == 0) - { - if (psContext->psShader->asPhases[psContext->currentPhase].acInputNeedsRedirect[ui32Register] != 0) - return SVT_FLOAT; // All combined inputs are stored as floats - psContext->psShader->sInfo.GetInputSignatureFromRegister(ui32Register, GetAccessMask(), - &psIn); - } - else - { - if (psContext->psShader->asPhases[psContext->currentPhase].acPatchConstantsNeedsRedirect[ui32Register] != 0) - return SVT_FLOAT; // All combined inputs are stored as floats - psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(ui32Register, GetAccessMask(), &psIn); - } - - ASSERT(psIn != NULL); - - switch (eSpecialName) - { - //UINT in DX, INT in GL. - case NAME_PRIMITIVE_ID: - case NAME_VERTEX_ID: - case NAME_INSTANCE_ID: - case NAME_RENDER_TARGET_ARRAY_INDEX: - case NAME_VIEWPORT_ARRAY_INDEX: - case NAME_SAMPLE_INDEX: - return SVT_INT; - - case NAME_IS_FRONT_FACE: - return SVT_UINT; - - case NAME_POSITION: - case NAME_CLIP_DISTANCE: - case NAME_CULL_DISTANCE: - return SVT_FLOAT; - - default: - break; - // fall through - } - - if (psIn->eSystemValueType == NAME_IS_FRONT_FACE) - return SVT_UINT; - - if (eSpecialName == NAME_PRIMITIVE_ID || eSpecialName == NAME_VERTEX_ID) - { - return SVT_INT; - } - - //UINT in DX, INT in GL. - if (psIn->eSystemValueType == NAME_INSTANCE_ID || - psIn->eSystemValueType == NAME_PRIMITIVE_ID || - psIn->eSystemValueType == NAME_VERTEX_ID || - psIn->eSystemValueType == NAME_RENDER_TARGET_ARRAY_INDEX || - psIn->eSystemValueType == NAME_VIEWPORT_ARRAY_INDEX || - psIn->eSystemValueType == NAME_SAMPLE_INDEX - ) - { - return SVT_INT; - } - - if (psIn->eMinPrec != MIN_PRECISION_DEFAULT) - { - switch (psIn->eMinPrec) - { - default: - ASSERT(0); - break; - case MIN_PRECISION_FLOAT_16: - return SVT_FLOAT16; - case MIN_PRECISION_FLOAT_2_8: - if (psContext->psShader->eTargetLanguage == LANG_METAL) - return SVT_FLOAT16; - else - return SVT_FLOAT10; - case MIN_PRECISION_SINT_16: - return SVT_INT16; - case MIN_PRECISION_UINT_16: - return SVT_UINT16; - } - } - - if (psIn->eComponentType == INOUT_COMPONENT_UINT32) - { - return SVT_UINT; - } - else if (psIn->eComponentType == INOUT_COMPONENT_SINT32) - { - return SVT_INT; - } - return SVT_FLOAT; - break; - } - case OPERAND_TYPE_CONSTANT_BUFFER: - { - const ConstantBuffer* psCBuf = NULL; - const ShaderVarType* psVarType = NULL; - int32_t rebase = -1; - bool isArray; - psContext->psShader->sInfo.GetConstantBufferFromBindingPoint(RGROUP_CBUFFER, aui32ArraySizes[0], &psCBuf); - if (psCBuf) - { - int foundVar = ShaderInfo::GetShaderVarFromOffset(aui32ArraySizes[1], aui32Swizzle, psCBuf, &psVarType, &isArray, NULL, &rebase, psContext->flags); - if (foundVar) - { - return psVarType->Type; - } - } - else - { - // Todo: this isn't correct yet. - return SVT_FLOAT; - } - break; - } - case OPERAND_TYPE_IMMEDIATE32: - { - return ePreferredTypeForImmediates; - } - - case OPERAND_TYPE_IMMEDIATE64: - { - return SVT_DOUBLE; - } - - case OPERAND_TYPE_INPUT_THREAD_ID: - case OPERAND_TYPE_INPUT_THREAD_GROUP_ID: - case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP: - case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED: - { - return SVT_UINT; - } - case OPERAND_TYPE_SPECIAL_ADDRESS: - case OPERAND_TYPE_SPECIAL_LOOPCOUNTER: - case OPERAND_TYPE_INPUT_FORK_INSTANCE_ID: - case OPERAND_TYPE_INPUT_PRIMITIVEID: - { - return SVT_INT; - } - case OPERAND_TYPE_INPUT_GS_INSTANCE_ID: - { - return SVT_UINT; - } - case OPERAND_TYPE_OUTPUT_COVERAGE_MASK: - { - return SVT_INT; - } - case OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID: - { - return SVT_INT; - } - case OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER: // constant array is floats everywhere except on vulkan - { - return psContext->IsVulkan() ? SVT_UINT : SVT_FLOAT; - } - - case OPERAND_TYPE_INDEXABLE_TEMP: // Indexable temps are always floats - default: - { - return SVT_FLOAT; - } - } - - return SVT_FLOAT; + return eCurrentType; + } + + ASSERT(0); + + break; + } + case OPERAND_TYPE_OUTPUT: + { + const uint32_t ui32Register = ui32RegisterNumber; + int regSpace = GetRegisterSpace(psContext); + const ShaderInfo::InOutSignature* psOut = NULL; + + if (regSpace == 0) + psContext->psShader->sInfo.GetOutputSignatureFromRegister(ui32Register, GetAccessMask(), psContext->psShader->ui32CurrentVertexOutputStream, + &psOut); + else + { + psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(ui32Register, GetAccessMask(), &psOut, true); + if (!psOut) + return SVT_FLOAT; + } + + ASSERT(psOut != NULL); + if (psOut->eMinPrec != MIN_PRECISION_DEFAULT) + { + switch (psOut->eMinPrec) + { + default: + ASSERT(0); + break; + case MIN_PRECISION_FLOAT_16: + return SVT_FLOAT16; + case MIN_PRECISION_FLOAT_2_8: + if (psContext->psShader->eTargetLanguage == LANG_METAL) + return SVT_FLOAT16; + else + return SVT_FLOAT10; + case MIN_PRECISION_SINT_16: + return SVT_INT16; + case MIN_PRECISION_UINT_16: + return SVT_UINT16; + } + } + if (psOut->eComponentType == INOUT_COMPONENT_UINT32) + { + return SVT_UINT; + } + else if (psOut->eComponentType == INOUT_COMPONENT_SINT32) + { + return SVT_INT; + } + return SVT_FLOAT; + break; + } + case OPERAND_TYPE_INPUT: + case OPERAND_TYPE_INPUT_PATCH_CONSTANT: + case OPERAND_TYPE_INPUT_CONTROL_POINT: + { + const uint32_t ui32Register = aui32ArraySizes[iIndexDims - 1]; + int regSpace = GetRegisterSpace(psContext); + const ShaderInfo::InOutSignature* psIn = NULL; + + if (regSpace == 0) + { + if (psContext->psShader->asPhases[psContext->currentPhase].acInputNeedsRedirect[ui32Register] != 0) + return SVT_FLOAT; // All combined inputs are stored as floats + psContext->psShader->sInfo.GetInputSignatureFromRegister(ui32Register, GetAccessMask(), + &psIn); + } + else + { + if (psContext->psShader->asPhases[psContext->currentPhase].acPatchConstantsNeedsRedirect[ui32Register] != 0) + return SVT_FLOAT; // All combined inputs are stored as floats + psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(ui32Register, GetAccessMask(), &psIn); + } + + ASSERT(psIn != NULL); + + switch (eSpecialName) + { + //UINT in DX, INT in GL. + case NAME_PRIMITIVE_ID: + case NAME_VERTEX_ID: + case NAME_INSTANCE_ID: + case NAME_RENDER_TARGET_ARRAY_INDEX: + case NAME_VIEWPORT_ARRAY_INDEX: + case NAME_SAMPLE_INDEX: + return (psContext->psShader->eTargetLanguage == LANG_METAL) ? SVT_UINT : SVT_INT; + + case NAME_IS_FRONT_FACE: + return SVT_UINT; + + case NAME_POSITION: + case NAME_CLIP_DISTANCE: + case NAME_CULL_DISTANCE: + return SVT_FLOAT; + + default: + break; + // fall through + } + + if (psIn->eSystemValueType == NAME_IS_FRONT_FACE) + return SVT_UINT; + + //UINT in DX, INT in GL. + if (psIn->eSystemValueType == NAME_PRIMITIVE_ID || + psIn->eSystemValueType == NAME_VERTEX_ID || + psIn->eSystemValueType == NAME_INSTANCE_ID || + psIn->eSystemValueType == NAME_RENDER_TARGET_ARRAY_INDEX || + psIn->eSystemValueType == NAME_VIEWPORT_ARRAY_INDEX || + psIn->eSystemValueType == NAME_SAMPLE_INDEX) + return (psContext->psShader->eTargetLanguage == LANG_METAL) ? SVT_UINT : SVT_INT; + + if (psIn->eMinPrec != MIN_PRECISION_DEFAULT) + { + switch (psIn->eMinPrec) + { + default: + ASSERT(0); + break; + case MIN_PRECISION_FLOAT_16: + return SVT_FLOAT16; + case MIN_PRECISION_FLOAT_2_8: + if (psContext->psShader->eTargetLanguage == LANG_METAL) + return SVT_FLOAT16; + else + return SVT_FLOAT10; + case MIN_PRECISION_SINT_16: + return SVT_INT16; + case MIN_PRECISION_UINT_16: + return SVT_UINT16; + } + } + + if (psIn->eComponentType == INOUT_COMPONENT_UINT32) + { + return SVT_UINT; + } + else if (psIn->eComponentType == INOUT_COMPONENT_SINT32) + { + return SVT_INT; + } + return SVT_FLOAT; + break; + } + case OPERAND_TYPE_CONSTANT_BUFFER: + { + const ConstantBuffer* psCBuf = NULL; + const ShaderVarType* psVarType = NULL; + int32_t rebase = -1; + bool isArray; + psContext->psShader->sInfo.GetConstantBufferFromBindingPoint(RGROUP_CBUFFER, aui32ArraySizes[0], &psCBuf); + if (psCBuf) + { + int foundVar = ShaderInfo::GetShaderVarFromOffset(aui32ArraySizes[1], aui32Swizzle, psCBuf, &psVarType, &isArray, NULL, &rebase, psContext->flags); + if (foundVar) + { + return psVarType->Type; + } + } + else + { + // Todo: this isn't correct yet. + return SVT_FLOAT; + } + break; + } + case OPERAND_TYPE_IMMEDIATE32: + { + return ePreferredTypeForImmediates; + } + + case OPERAND_TYPE_IMMEDIATE64: + { + return SVT_DOUBLE; + } + + case OPERAND_TYPE_INPUT_THREAD_ID: + case OPERAND_TYPE_INPUT_THREAD_GROUP_ID: + case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP: + case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED: + { + return SVT_UINT; + } + case OPERAND_TYPE_SPECIAL_ADDRESS: + case OPERAND_TYPE_SPECIAL_LOOPCOUNTER: + case OPERAND_TYPE_INPUT_FORK_INSTANCE_ID: + case OPERAND_TYPE_INPUT_PRIMITIVEID: + { + return SVT_INT; + } + case OPERAND_TYPE_INPUT_GS_INSTANCE_ID: + { + return SVT_UINT; + } + case OPERAND_TYPE_OUTPUT_COVERAGE_MASK: + { + return SVT_INT; + } + case OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID: + { + return SVT_INT; + } + case OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER: // constant array is floats everywhere except on vulkan + { + return psContext->IsVulkan() ? SVT_UINT : SVT_FLOAT; + } + + case OPERAND_TYPE_INDEXABLE_TEMP: // Indexable temps are always floats + default: + { + return SVT_FLOAT; + } + } + + return SVT_FLOAT; } OPERAND_MIN_PRECISION Operand::ResourcePrecisionToOperandPrecision(REFLECT_RESOURCE_PRECISION ePrec) { - switch (ePrec) - { - default: - case REFLECT_RESOURCE_PRECISION_UNKNOWN: - case REFLECT_RESOURCE_PRECISION_LOWP: - return OPERAND_MIN_PRECISION_FLOAT_2_8; - case REFLECT_RESOURCE_PRECISION_MEDIUMP: - return OPERAND_MIN_PRECISION_FLOAT_16; - case REFLECT_RESOURCE_PRECISION_HIGHP: - return OPERAND_MIN_PRECISION_DEFAULT; - } + switch (ePrec) + { + default: + case REFLECT_RESOURCE_PRECISION_UNKNOWN: + case REFLECT_RESOURCE_PRECISION_LOWP: + return OPERAND_MIN_PRECISION_FLOAT_2_8; + case REFLECT_RESOURCE_PRECISION_MEDIUMP: + return OPERAND_MIN_PRECISION_FLOAT_16; + case REFLECT_RESOURCE_PRECISION_HIGHP: + return OPERAND_MIN_PRECISION_DEFAULT; + } } int Operand::GetNumInputElements(const HLSLCrossCompilerContext *psContext) const { - const ShaderInfo::InOutSignature *psSig = NULL; - int regSpace = GetRegisterSpace(psContext); - - switch (eType) - { - case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED: - case OPERAND_TYPE_INPUT_FORK_INSTANCE_ID: - case OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID: - return 1; - case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP: - case OPERAND_TYPE_INPUT_THREAD_ID: - case OPERAND_TYPE_INPUT_THREAD_GROUP_ID: - case OPERAND_TYPE_INPUT_DOMAIN_POINT: - return 3; - default: - break; - } - - if (regSpace == 0) - psContext->psShader->sInfo.GetInputSignatureFromRegister(ui32RegisterNumber, GetAccessMask(), &psSig); - else - psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(ui32RegisterNumber, GetAccessMask(), &psSig); - - ASSERT(psSig != NULL); - - // TODO: Are there ever any cases where the mask has 'holes'? - return HLSLcc::GetNumberBitsSet(psSig->ui32Mask); + const ShaderInfo::InOutSignature *psSig = NULL; + int regSpace = GetRegisterSpace(psContext); + + switch (eType) + { + case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED: + case OPERAND_TYPE_INPUT_FORK_INSTANCE_ID: + case OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID: + return 1; + case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP: + case OPERAND_TYPE_INPUT_THREAD_ID: + case OPERAND_TYPE_INPUT_THREAD_GROUP_ID: + case OPERAND_TYPE_INPUT_DOMAIN_POINT: + return 3; + default: + break; + } + + if (regSpace == 0) + psContext->psShader->sInfo.GetInputSignatureFromRegister(ui32RegisterNumber, GetAccessMask(), &psSig); + else + psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(ui32RegisterNumber, GetAccessMask(), &psSig); + + ASSERT(psSig != NULL); + + // TODO: Are there ever any cases where the mask has 'holes'? + return HLSLcc::GetNumberBitsSet(psSig->ui32Mask); } Operand* Operand::GetDynamicIndexOperand(HLSLCrossCompilerContext *psContext, const ShaderVarType* psVar, bool isAoS, bool *needsIndexCalcRevert) const @@ -635,7 +620,7 @@ Operand* Operand::GetDynamicIndexOperand(HLSLCrossCompilerContext *psContext, co { indexVarFound = true; - // Check if the mul dest is not the same temp as the src. Also check that the temp + // Check if the mul dest is not the same temp as the src. Also check that the temp // does not have multiple uses (which could override the value) // -> we can use src straight and no index revert calc is needed if ((psOriginOp->eType == OPERAND_TYPE_INPUT) diff --git a/src/Shader.cpp b/src/Shader.cpp index b6af00a..b59bd73 100644 --- a/src/Shader.cpp +++ b/src/Shader.cpp @@ -1,4 +1,3 @@ - #include "internal_includes/Shader.h" #include "internal_includes/debug.h" #include @@ -8,62 +7,62 @@ uint32_t Shader::GetTempComponentCount(SHADER_VARIABLE_TYPE eType, uint32_t ui32Reg) const { - switch (eType) - { - case SVT_FLOAT: - return psFloatTempSizes[ui32Reg]; - case SVT_FLOAT16: - return psFloat16TempSizes[ui32Reg]; - case SVT_FLOAT10: - return psFloat10TempSizes[ui32Reg]; - case SVT_INT: - return psIntTempSizes[ui32Reg]; - case SVT_INT16: - return psInt16TempSizes[ui32Reg]; - case SVT_INT12: - return psInt12TempSizes[ui32Reg]; - case SVT_UINT: - return psUIntTempSizes[ui32Reg]; - case SVT_UINT16: - return psUInt16TempSizes[ui32Reg]; - case SVT_DOUBLE: - return psDoubleTempSizes[ui32Reg]; - case SVT_BOOL: - return psBoolTempSizes[ui32Reg]; - default: - ASSERT(0); - } - return 0; + switch (eType) + { + case SVT_FLOAT: + return psFloatTempSizes[ui32Reg]; + case SVT_FLOAT16: + return psFloat16TempSizes[ui32Reg]; + case SVT_FLOAT10: + return psFloat10TempSizes[ui32Reg]; + case SVT_INT: + return psIntTempSizes[ui32Reg]; + case SVT_INT16: + return psInt16TempSizes[ui32Reg]; + case SVT_INT12: + return psInt12TempSizes[ui32Reg]; + case SVT_UINT: + return psUIntTempSizes[ui32Reg]; + case SVT_UINT16: + return psUInt16TempSizes[ui32Reg]; + case SVT_DOUBLE: + return psDoubleTempSizes[ui32Reg]; + case SVT_BOOL: + return psBoolTempSizes[ui32Reg]; + default: + ASSERT(0); + } + return 0; } void Shader::ConsolidateHullTempVars() { - uint32_t i, phase; - uint32_t numTemps = 0; - for (phase = 0; phase < asPhases.size(); phase++) - { - for (i = 0; i < asPhases[phase].psDecl.size(); i++) - { - if (asPhases[phase].psDecl[i].eOpcode == OPCODE_DCL_TEMPS) - { - if (asPhases[phase].psDecl[i].value.ui32NumTemps > numTemps) - numTemps = asPhases[phase].psDecl[i].value.ui32NumTemps; - asPhases[phase].psDecl[i].value.ui32NumTemps = 0; - } - } - } - // Now we have the max temps, write it back to the first one we see. - for (phase = 0; phase < asPhases.size(); phase++) - { - for (i = 0; i < asPhases[phase].psDecl.size(); i++) - { - if (asPhases[phase].psDecl[i].eOpcode == OPCODE_DCL_TEMPS) - { - asPhases[phase].psDecl[i].value.ui32NumTemps = numTemps; - return; - } - } - } + uint32_t i, phase; + uint32_t numTemps = 0; + for (phase = 0; phase < asPhases.size(); phase++) + { + for (i = 0; i < asPhases[phase].psDecl.size(); i++) + { + if (asPhases[phase].psDecl[i].eOpcode == OPCODE_DCL_TEMPS) + { + if (asPhases[phase].psDecl[i].value.ui32NumTemps > numTemps) + numTemps = asPhases[phase].psDecl[i].value.ui32NumTemps; + asPhases[phase].psDecl[i].value.ui32NumTemps = 0; + } + } + } + // Now we have the max temps, write it back to the first one we see. + for (phase = 0; phase < asPhases.size(); phase++) + { + for (i = 0; i < asPhases[phase].psDecl.size(); i++) + { + if (asPhases[phase].psDecl[i].eOpcode == OPCODE_DCL_TEMPS) + { + asPhases[phase].psDecl[i].value.ui32NumTemps = numTemps; + return; + } + } + } } // HLSL has separate register spaces for UAV and structured buffers. GLSL has shared register space for all buffers. @@ -71,39 +70,39 @@ void Shader::ConsolidateHullTempVars() // In this step make aui32StructuredBufferBindingPoints contain increasingly ordered uints starting from zero. void Shader::PrepareStructuredBufferBindingSlots() { - uint32_t i; + uint32_t i; - for (i = 0; i < MAX_RESOURCE_BINDINGS; i++) - { - aui32StructuredBufferBindingPoints[i] = i; - } + for (i = 0; i < MAX_RESOURCE_BINDINGS; i++) + { + aui32StructuredBufferBindingPoints[i] = i; + } } // Go through all declarations and remove UAV occupied binding points from the aui32StructuredBufferBindingPoints list void Shader::ResolveStructuredBufferBindingSlots(ShaderPhase *psPhase) { - uint32_t p; - std::vector &bindingArray = aui32StructuredBufferBindingPoints; - - for (p = 0; p < psPhase->psDecl.size(); ++p) - { - if (psPhase->psDecl[p].eOpcode == OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW || - psPhase->psDecl[p].eOpcode == OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED) - { - uint32_t uav = psPhase->psDecl[p].asOperands[0].ui32RegisterNumber; // uav binding point - uint32_t i; - - // Find uav binding point from the list. Drop search if not found. - for (i = 0; i < MAX_RESOURCE_BINDINGS && bindingArray[i] <= uav; i++) - { - if (bindingArray[i] == uav) // Remove uav binding point from the list by copying array remainder here - { - memcpy(&bindingArray[i], &bindingArray[i + 1], (MAX_RESOURCE_BINDINGS - 1 - i)*sizeof(uint32_t)); - break; - } - } - } - } + uint32_t p; + std::vector &bindingArray = aui32StructuredBufferBindingPoints; + + for (p = 0; p < psPhase->psDecl.size(); ++p) + { + if (psPhase->psDecl[p].eOpcode == OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW || + psPhase->psDecl[p].eOpcode == OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED) + { + uint32_t uav = psPhase->psDecl[p].asOperands[0].ui32RegisterNumber; // uav binding point + uint32_t i; + + // Find uav binding point from the list. Drop search if not found. + for (i = 0; i < MAX_RESOURCE_BINDINGS && bindingArray[i] <= uav; i++) + { + if (bindingArray[i] == uav) // Remove uav binding point from the list by copying array remainder here + { + memcpy(&bindingArray[i], &bindingArray[i + 1], (MAX_RESOURCE_BINDINGS - 1 - i) * sizeof(uint32_t)); + break; + } + } + } + } } // Image (RWTexture in HLSL) declaration op does not provide enough info about the format and accessing. @@ -112,907 +111,895 @@ void Shader::ResolveStructuredBufferBindingSlots(ShaderPhase *psPhase) // Also resolve access flags for other UAVs as well. No component count resolving for them. void ShaderPhase::ResolveUAVProperties() { - Declaration *psFirstDeclaration = &psDecl[0]; - - uint32_t ui32NumDeclarations = (uint32_t)psDecl.size(); - Instruction *psFirstInstruction = &psInst[0]; - uint32_t ui32NumInstructions = (uint32_t)psInst.size(); - - if (ui32NumDeclarations == 0 || ui32NumInstructions == 0) - return; - - Declaration *psLastDeclaration = psFirstDeclaration + ui32NumDeclarations - 1; - Instruction *psLastInstruction = psFirstInstruction + ui32NumInstructions - 1; - Declaration *psDecl; - - for (psDecl = psFirstDeclaration; psDecl <= psLastDeclaration; psDecl++) - { - Instruction *psInst; - uint32_t uavReg; - if (psDecl->eOpcode != OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED && - psDecl->eOpcode != OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED && - psDecl->eOpcode != OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW) - continue; - - uavReg = psDecl->asOperands[0].ui32RegisterNumber; - - for (psInst = psFirstInstruction; psInst <= psLastInstruction; psInst++) - { - uint32_t opIndex; - uint32_t accessFlags; - uint32_t numComponents; - - switch (psInst->eOpcode) - { - case OPCODE_LD_UAV_TYPED: - opIndex = 2; - accessFlags = ACCESS_FLAG_READ; - numComponents = psInst->asOperands[0].GetNumSwizzleElements(); // get component count from the write target - break; - - case OPCODE_STORE_UAV_TYPED: - ASSERT(psInst->asOperands[0].eType == OPERAND_TYPE_UNORDERED_ACCESS_VIEW); - opIndex = 0; - accessFlags = ACCESS_FLAG_WRITE; - numComponents = 0; // store op does not contribute on the component count resolving - break; - - case OPCODE_ATOMIC_CMP_STORE: - case OPCODE_ATOMIC_AND: - case OPCODE_ATOMIC_IADD: - case OPCODE_ATOMIC_OR: - case OPCODE_ATOMIC_XOR: - case OPCODE_ATOMIC_IMIN: - case OPCODE_ATOMIC_UMIN: - opIndex = 0; - accessFlags = ACCESS_FLAG_READ | ACCESS_FLAG_WRITE; - numComponents = 1; - break; - - case OPCODE_IMM_ATOMIC_AND: - case OPCODE_IMM_ATOMIC_IADD: - case OPCODE_IMM_ATOMIC_IMAX: - case OPCODE_IMM_ATOMIC_IMIN: - case OPCODE_IMM_ATOMIC_UMAX: - case OPCODE_IMM_ATOMIC_UMIN: - case OPCODE_IMM_ATOMIC_OR: - case OPCODE_IMM_ATOMIC_XOR: - case OPCODE_IMM_ATOMIC_EXCH: - case OPCODE_IMM_ATOMIC_CMP_EXCH: - opIndex = 1; - accessFlags = ACCESS_FLAG_READ | ACCESS_FLAG_WRITE; - numComponents = 1; - break; - - // The rest of the ops here are only for buffer UAVs. No need for component count resolving. - case OPCODE_LD_STRUCTURED: - opIndex = 3; - accessFlags = ACCESS_FLAG_READ; - numComponents = 0; - break; - - case OPCODE_STORE_STRUCTURED: - opIndex = 0; - accessFlags = ACCESS_FLAG_WRITE; - numComponents = 0; - break; - - case OPCODE_LD_RAW: - opIndex = 2; - accessFlags = ACCESS_FLAG_READ; - numComponents = 0; - break; - - case OPCODE_STORE_RAW: - opIndex = 0; - accessFlags = ACCESS_FLAG_WRITE; - numComponents = 0; - break; - - case OPCODE_IMM_ATOMIC_ALLOC: - case OPCODE_IMM_ATOMIC_CONSUME: - opIndex = 1; - accessFlags = ACCESS_FLAG_READ | ACCESS_FLAG_WRITE; - numComponents = 0; - break; - - default: - continue; - } - - // Buffer loads can also happen on non-uav. Skip those. - if(psInst->asOperands[opIndex].eType != OPERAND_TYPE_UNORDERED_ACCESS_VIEW) - continue; - - // Check the instruction is operating on the declared uav - if (psInst->asOperands[opIndex].ui32RegisterNumber != uavReg) - continue; - - psDecl->sUAV.ui32AccessFlags |= accessFlags; - - // get the max components accessed, but only for typed (texture) UAVs - if (numComponents > psDecl->sUAV.ui32NumComponents && psDecl->eOpcode == OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED) - { - psDecl->sUAV.ui32NumComponents = numComponents; - } - } - } + Declaration *psFirstDeclaration = &psDecl[0]; + + uint32_t ui32NumDeclarations = (uint32_t)psDecl.size(); + Instruction *psFirstInstruction = &psInst[0]; + uint32_t ui32NumInstructions = (uint32_t)psInst.size(); + + if (ui32NumDeclarations == 0 || ui32NumInstructions == 0) + return; + + Declaration *psLastDeclaration = psFirstDeclaration + ui32NumDeclarations - 1; + Instruction *psLastInstruction = psFirstInstruction + ui32NumInstructions - 1; + Declaration *psDecl; + + for (psDecl = psFirstDeclaration; psDecl <= psLastDeclaration; psDecl++) + { + Instruction *psInst; + uint32_t uavReg; + if (psDecl->eOpcode != OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED && + psDecl->eOpcode != OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED && + psDecl->eOpcode != OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW) + continue; + + uavReg = psDecl->asOperands[0].ui32RegisterNumber; + + for (psInst = psFirstInstruction; psInst <= psLastInstruction; psInst++) + { + uint32_t opIndex; + uint32_t accessFlags; + uint32_t numComponents; + + switch (psInst->eOpcode) + { + case OPCODE_LD_UAV_TYPED: + opIndex = 2; + accessFlags = ACCESS_FLAG_READ; + numComponents = psInst->asOperands[0].GetNumSwizzleElements(); // get component count from the write target + break; + + case OPCODE_STORE_UAV_TYPED: + ASSERT(psInst->asOperands[0].eType == OPERAND_TYPE_UNORDERED_ACCESS_VIEW); + opIndex = 0; + accessFlags = ACCESS_FLAG_WRITE; + numComponents = 0; // store op does not contribute on the component count resolving + break; + + case OPCODE_ATOMIC_CMP_STORE: + case OPCODE_ATOMIC_AND: + case OPCODE_ATOMIC_IADD: + case OPCODE_ATOMIC_OR: + case OPCODE_ATOMIC_XOR: + case OPCODE_ATOMIC_IMIN: + case OPCODE_ATOMIC_UMIN: + opIndex = 0; + accessFlags = ACCESS_FLAG_READ | ACCESS_FLAG_WRITE; + numComponents = 1; + break; + + case OPCODE_IMM_ATOMIC_AND: + case OPCODE_IMM_ATOMIC_IADD: + case OPCODE_IMM_ATOMIC_IMAX: + case OPCODE_IMM_ATOMIC_IMIN: + case OPCODE_IMM_ATOMIC_UMAX: + case OPCODE_IMM_ATOMIC_UMIN: + case OPCODE_IMM_ATOMIC_OR: + case OPCODE_IMM_ATOMIC_XOR: + case OPCODE_IMM_ATOMIC_EXCH: + case OPCODE_IMM_ATOMIC_CMP_EXCH: + opIndex = 1; + accessFlags = ACCESS_FLAG_READ | ACCESS_FLAG_WRITE; + numComponents = 1; + break; + + // The rest of the ops here are only for buffer UAVs. No need for component count resolving. + case OPCODE_LD_STRUCTURED: + opIndex = 3; + accessFlags = ACCESS_FLAG_READ; + numComponents = 0; + break; + + case OPCODE_STORE_STRUCTURED: + opIndex = 0; + accessFlags = ACCESS_FLAG_WRITE; + numComponents = 0; + break; + + case OPCODE_LD_RAW: + opIndex = 2; + accessFlags = ACCESS_FLAG_READ; + numComponents = 0; + break; + + case OPCODE_STORE_RAW: + opIndex = 0; + accessFlags = ACCESS_FLAG_WRITE; + numComponents = 0; + break; + + case OPCODE_IMM_ATOMIC_ALLOC: + case OPCODE_IMM_ATOMIC_CONSUME: + opIndex = 1; + accessFlags = ACCESS_FLAG_READ | ACCESS_FLAG_WRITE; + numComponents = 0; + break; + + default: + continue; + } + + // Buffer loads can also happen on non-uav. Skip those. + if (psInst->asOperands[opIndex].eType != OPERAND_TYPE_UNORDERED_ACCESS_VIEW) + continue; + + // Check the instruction is operating on the declared uav + if (psInst->asOperands[opIndex].ui32RegisterNumber != uavReg) + continue; + + psDecl->sUAV.ui32AccessFlags |= accessFlags; + + // get the max components accessed, but only for typed (texture) UAVs + if (numComponents > psDecl->sUAV.ui32NumComponents && psDecl->eOpcode == OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED) + { + psDecl->sUAV.ui32NumComponents = numComponents; + } + } + } } - static void GatherOperandAccessMasks(const Operand *psOperand, char *destTable) { - int i; - uint32_t reg; - for (i = 0; i < MAX_SUB_OPERANDS; i++) - { - if (psOperand->m_SubOperands[i].get()) - GatherOperandAccessMasks(psOperand->m_SubOperands[i].get(), destTable); - } + int i; + uint32_t reg; + for (i = 0; i < MAX_SUB_OPERANDS; i++) + { + if (psOperand->m_SubOperands[i].get()) + GatherOperandAccessMasks(psOperand->m_SubOperands[i].get(), destTable); + } - if (psOperand->eType != OPERAND_TYPE_TEMP) - return; + if (psOperand->eType != OPERAND_TYPE_TEMP) + return; - reg = psOperand->ui32RegisterNumber & 0xffff; // We add 0x10000 to all newly created ones earlier + reg = psOperand->ui32RegisterNumber & 0xffff; // We add 0x10000 to all newly created ones earlier - destTable[reg] |= (char)psOperand->GetAccessMask(); + destTable[reg] |= (char)psOperand->GetAccessMask(); } // Coalesce the split temps back based on their original temp register. Keep uint/int/float operations separate static void CoalesceTemps(Shader *psShader, ShaderPhase *psPhase, uint32_t ui32MaxOrigTemps) { - // Just move all operations back to their original registers, but keep the data type assignments. - uint32_t i, k; - Instruction *psLastInstruction = &psPhase->psInst[psPhase->psInst.size() - 1]; - std::vector opAccessMasks; - - // First move all newly created temps to high enough so they won't overlap with the rebased ones - - Instruction *inst = &psPhase->psInst[0]; - - if (psPhase->psInst.size() == 0 || psPhase->ui32OrigTemps == 0) - return; - - while (inst <= psLastInstruction) - { - // Update all operands and their suboperands - for (i = psPhase->ui32OrigTemps; i < psPhase->ui32TotalTemps; i++) - { - for (k = 0; k < inst->ui32NumOperands; k++) - inst->ChangeOperandTempRegister(&inst->asOperands[k], i, 0x10000 + i, OPERAND_4_COMPONENT_MASK_ALL, UD_CHANGE_ALL, 0); - } - inst++; - } - - // Prune the original registers, rebase if necessary - opAccessMasks.clear(); - opAccessMasks.resize(psPhase->ui32TotalTemps, 0); - inst = &psPhase->psInst[0]; - while (inst <= psLastInstruction) - { - for (k = 0; k < inst->ui32NumOperands; k++) - GatherOperandAccessMasks(&inst->asOperands[k], &opAccessMasks[0]); - inst++; - } - - for (i = 0; i < psPhase->ui32TotalTemps; i++) - { - uint32_t rebase, count; - uint32_t newReg = i; - uint32_t origReg = i; - int needsMoving = 0; - SHADER_VARIABLE_TYPE dataType; - - // Figure out rebase and count - rebase = 0; - count = 0; - if (i < psPhase->ui32OrigTemps) - { - // One of the original registers - k = opAccessMasks[i]; - if (k == 0) - continue; - - while ((k & 1) == 0) - { - rebase++; - k = k >> 1; - } - while (k != 0) - { - count++; - k = k >> 1; - } - newReg = i + ui32MaxOrigTemps * rebase; - if (rebase != 0) - needsMoving = 1; - } - else - { - // Newly created split registers, read info from table - // Read the count and rebase from split info table - count = (psPhase->pui32SplitInfo[i] >> 24) & 0xff; - rebase = (psPhase->pui32SplitInfo[i] >> 16) & 0xff; - origReg = 0x10000 + i; - newReg = (psPhase->pui32SplitInfo[i]) & 0xffff; - while (psPhase->pui32SplitInfo[newReg] != 0xffffffff) - newReg = (psPhase->pui32SplitInfo[newReg]) & 0xffff; - - // If count is 4, verify that we have both first and last bit set - ASSERT(count != 4 || (opAccessMasks[i] & 9) == 9); - - newReg = newReg + ui32MaxOrigTemps * rebase; - - // Don't rebase again - rebase = 0; - needsMoving = 1; - - } - - if (needsMoving) - { - // printf("Moving reg %d to %d, count %d rebase %d\n", origReg, newReg, count, rebase); - - // Move directly to correct location - inst = &psPhase->psInst[0]; - while (inst <= psLastInstruction) - { - for (k = 0; k < inst->ui32NumOperands; k++) - inst->ChangeOperandTempRegister(&inst->asOperands[k], origReg, newReg, OPERAND_4_COMPONENT_MASK_ALL, UD_CHANGE_ALL, rebase); - inst++; - } - } - // Mark the count - dataType = psPhase->peTempTypes[i * 4 + rebase]; - switch (dataType) - { - default: - ASSERT(0); - break; - case SVT_BOOL: - psShader->psBoolTempSizes[newReg] = std::max(psShader->psBoolTempSizes[newReg], (char)count); - break; - case SVT_FLOAT: - psShader->psFloatTempSizes[newReg] = std::max(psShader->psFloatTempSizes[newReg], (char)count); - break; - case SVT_FLOAT16: - psShader->psFloat16TempSizes[newReg] = std::max(psShader->psFloat16TempSizes[newReg], (char)count); - break; - case SVT_FLOAT10: - psShader->psFloat10TempSizes[newReg] = std::max(psShader->psFloat10TempSizes[newReg], (char)count); - break; - case SVT_INT: - psShader->psIntTempSizes[newReg] = std::max(psShader->psIntTempSizes[newReg], (char)count); - break; - case SVT_INT16: - psShader->psInt16TempSizes[newReg] = std::max(psShader->psInt16TempSizes[newReg], (char)count); - break; - case SVT_INT12: - psShader->psInt12TempSizes[newReg] = std::max(psShader->psInt12TempSizes[newReg], (char)count); - break; - case SVT_UINT: - psShader->psUIntTempSizes[newReg] = std::max(psShader->psUIntTempSizes[newReg], (char)count); - break; - case SVT_UINT16: - psShader->psUInt16TempSizes[newReg] = std::max(psShader->psUInt16TempSizes[newReg], (char)count); - break; - case SVT_DOUBLE: - psShader->psDoubleTempSizes[newReg] = std::max(psShader->psDoubleTempSizes[newReg], (char)count); - break; - } - } - + // Just move all operations back to their original registers, but keep the data type assignments. + uint32_t i, k; + Instruction *psLastInstruction = &psPhase->psInst[psPhase->psInst.size() - 1]; + std::vector opAccessMasks; + + // First move all newly created temps to high enough so they won't overlap with the rebased ones + + Instruction *inst = &psPhase->psInst[0]; + + if (psPhase->psInst.size() == 0 || psPhase->ui32OrigTemps == 0) + return; + + while (inst <= psLastInstruction) + { + // Update all operands and their suboperands + for (i = psPhase->ui32OrigTemps; i < psPhase->ui32TotalTemps; i++) + { + for (k = 0; k < inst->ui32NumOperands; k++) + inst->ChangeOperandTempRegister(&inst->asOperands[k], i, 0x10000 + i, OPERAND_4_COMPONENT_MASK_ALL, UD_CHANGE_ALL, 0); + } + inst++; + } + + // Prune the original registers, rebase if necessary + opAccessMasks.clear(); + opAccessMasks.resize(psPhase->ui32TotalTemps, 0); + inst = &psPhase->psInst[0]; + while (inst <= psLastInstruction) + { + for (k = 0; k < inst->ui32NumOperands; k++) + GatherOperandAccessMasks(&inst->asOperands[k], &opAccessMasks[0]); + inst++; + } + + for (i = 0; i < psPhase->ui32TotalTemps; i++) + { + uint32_t rebase, count; + uint32_t newReg = i; + uint32_t origReg = i; + int needsMoving = 0; + SHADER_VARIABLE_TYPE dataType; + + // Figure out rebase and count + rebase = 0; + count = 0; + if (i < psPhase->ui32OrigTemps) + { + // One of the original registers + k = opAccessMasks[i]; + if (k == 0) + continue; + + while ((k & 1) == 0) + { + rebase++; + k = k >> 1; + } + while (k != 0) + { + count++; + k = k >> 1; + } + newReg = i + ui32MaxOrigTemps * rebase; + if (rebase != 0) + needsMoving = 1; + } + else + { + // Newly created split registers, read info from table + // Read the count and rebase from split info table + count = (psPhase->pui32SplitInfo[i] >> 24) & 0xff; + rebase = (psPhase->pui32SplitInfo[i] >> 16) & 0xff; + origReg = 0x10000 + i; + newReg = (psPhase->pui32SplitInfo[i]) & 0xffff; + while (psPhase->pui32SplitInfo[newReg] != 0xffffffff) + newReg = (psPhase->pui32SplitInfo[newReg]) & 0xffff; + + // If count is 4, verify that we have both first and last bit set + ASSERT(count != 4 || (opAccessMasks[i] & 9) == 9); + + newReg = newReg + ui32MaxOrigTemps * rebase; + + // Don't rebase again + rebase = 0; + needsMoving = 1; + } + + if (needsMoving) + { + // printf("Moving reg %d to %d, count %d rebase %d\n", origReg, newReg, count, rebase); + + // Move directly to correct location + inst = &psPhase->psInst[0]; + while (inst <= psLastInstruction) + { + for (k = 0; k < inst->ui32NumOperands; k++) + inst->ChangeOperandTempRegister(&inst->asOperands[k], origReg, newReg, OPERAND_4_COMPONENT_MASK_ALL, UD_CHANGE_ALL, rebase); + inst++; + } + } + // Mark the count + dataType = psPhase->peTempTypes[i * 4 + rebase]; + switch (dataType) + { + default: + ASSERT(0); + break; + case SVT_BOOL: + psShader->psBoolTempSizes[newReg] = std::max(psShader->psBoolTempSizes[newReg], (char)count); + break; + case SVT_FLOAT: + psShader->psFloatTempSizes[newReg] = std::max(psShader->psFloatTempSizes[newReg], (char)count); + break; + case SVT_FLOAT16: + psShader->psFloat16TempSizes[newReg] = std::max(psShader->psFloat16TempSizes[newReg], (char)count); + break; + case SVT_FLOAT10: + psShader->psFloat10TempSizes[newReg] = std::max(psShader->psFloat10TempSizes[newReg], (char)count); + break; + case SVT_INT: + psShader->psIntTempSizes[newReg] = std::max(psShader->psIntTempSizes[newReg], (char)count); + break; + case SVT_INT16: + psShader->psInt16TempSizes[newReg] = std::max(psShader->psInt16TempSizes[newReg], (char)count); + break; + case SVT_INT12: + psShader->psInt12TempSizes[newReg] = std::max(psShader->psInt12TempSizes[newReg], (char)count); + break; + case SVT_UINT: + psShader->psUIntTempSizes[newReg] = std::max(psShader->psUIntTempSizes[newReg], (char)count); + break; + case SVT_UINT16: + psShader->psUInt16TempSizes[newReg] = std::max(psShader->psUInt16TempSizes[newReg], (char)count); + break; + case SVT_DOUBLE: + psShader->psDoubleTempSizes[newReg] = std::max(psShader->psDoubleTempSizes[newReg], (char)count); + break; + } + } } // Mark whether the temp registers are used per each data type. void Shader::PruneTempRegisters() { - uint32_t k; - uint32_t maxOrigTemps = 0; - uint32_t maxTotalTemps = 0; - // First find the total amount of temps - for (k = 0; k < asPhases.size(); k++) - { - ShaderPhase *psPhase = &asPhases[k]; - maxOrigTemps = std::max(maxOrigTemps, psPhase->ui32OrigTemps); - maxTotalTemps = std::max(maxTotalTemps, psPhase->ui32TotalTemps); - } - - if (maxTotalTemps == 0) - return; // splitarrays are nulls, no need to free - - // Allocate and zero-initialize arrays for each temp sizes. *4 is for every possible rebase - psIntTempSizes.clear(); - psIntTempSizes.resize(maxOrigTemps * 4, 0); - psInt12TempSizes.clear(); - psInt12TempSizes.resize(maxOrigTemps * 4, 0); - psInt16TempSizes.clear(); - psInt16TempSizes.resize(maxOrigTemps * 4, 0); - psUIntTempSizes.clear(); - psUIntTempSizes.resize(maxOrigTemps * 4, 0); - psUInt16TempSizes.clear(); - psUInt16TempSizes.resize(maxOrigTemps * 4, 0); - psFloatTempSizes.clear(); - psFloatTempSizes.resize(maxOrigTemps * 4, 0); - psFloat16TempSizes.clear(); - psFloat16TempSizes.resize(maxOrigTemps * 4, 0); - psFloat10TempSizes.clear(); - psFloat10TempSizes.resize(maxOrigTemps * 4, 0); - psDoubleTempSizes.clear(); - psDoubleTempSizes.resize(maxOrigTemps * 4, 0); - psBoolTempSizes.clear(); - psBoolTempSizes.resize(maxOrigTemps * 4, 0); - - for (k = 0; k < asPhases.size(); k++) - { - ShaderPhase *psPhase = &asPhases[k]; - CoalesceTemps(this, psPhase, maxOrigTemps); - if (psPhase->psTempDeclaration) - psPhase->psTempDeclaration->value.ui32NumTemps = maxOrigTemps * 4; - } - + uint32_t k; + uint32_t maxOrigTemps = 0; + uint32_t maxTotalTemps = 0; + // First find the total amount of temps + for (k = 0; k < asPhases.size(); k++) + { + ShaderPhase *psPhase = &asPhases[k]; + maxOrigTemps = std::max(maxOrigTemps, psPhase->ui32OrigTemps); + maxTotalTemps = std::max(maxTotalTemps, psPhase->ui32TotalTemps); + } + + if (maxTotalTemps == 0) + return; // splitarrays are nulls, no need to free + + // Allocate and zero-initialize arrays for each temp sizes. *4 is for every possible rebase + psIntTempSizes.clear(); + psIntTempSizes.resize(maxOrigTemps * 4, 0); + psInt12TempSizes.clear(); + psInt12TempSizes.resize(maxOrigTemps * 4, 0); + psInt16TempSizes.clear(); + psInt16TempSizes.resize(maxOrigTemps * 4, 0); + psUIntTempSizes.clear(); + psUIntTempSizes.resize(maxOrigTemps * 4, 0); + psUInt16TempSizes.clear(); + psUInt16TempSizes.resize(maxOrigTemps * 4, 0); + psFloatTempSizes.clear(); + psFloatTempSizes.resize(maxOrigTemps * 4, 0); + psFloat16TempSizes.clear(); + psFloat16TempSizes.resize(maxOrigTemps * 4, 0); + psFloat10TempSizes.clear(); + psFloat10TempSizes.resize(maxOrigTemps * 4, 0); + psDoubleTempSizes.clear(); + psDoubleTempSizes.resize(maxOrigTemps * 4, 0); + psBoolTempSizes.clear(); + psBoolTempSizes.resize(maxOrigTemps * 4, 0); + + for (k = 0; k < asPhases.size(); k++) + { + ShaderPhase *psPhase = &asPhases[k]; + CoalesceTemps(this, psPhase, maxOrigTemps); + if (psPhase->psTempDeclaration) + psPhase->psTempDeclaration->value.ui32NumTemps = maxOrigTemps * 4; + } } static void DoSignatureAnalysis(std::vector &psSignatures, std::vector &outTable) { - // Fill the char, 2 bits per component so that each 2 bits encode the following info: - // 0: unused OR used by the first signature we happened to see - // 1: used by the second signature - // 2: used by the third sig - // 3: used by the fourth sig. - - // The counters for each input/output/patch. Start with 8 registers, grow as needed - std::vector counters(8, (unsigned char)0); - outTable.clear(); - outTable.resize(8, (unsigned char)0); - - size_t i; - for (i = 0; i < psSignatures.size(); i++) - { - ShaderInfo::InOutSignature *psSig = &psSignatures[i]; - char currCounter; - char mask; - ASSERT(psSig != NULL); - - // We'll skip SV_Depth and others that put -1 to the register. - if (psSig->ui32Register == 0xffffffffu) - continue; - - // Make sure there's enough room in the table - if (psSig->ui32Register >= counters.size()) - { - counters.resize(psSig->ui32Register * 2, 0); - outTable.resize(psSig->ui32Register * 2, 0); - } - - // Apply counter value to masked items - currCounter = counters[psSig->ui32Register]; - // Duplicate counter bits - currCounter = currCounter | (currCounter << 2) | (currCounter << 4) | (currCounter << 6); - // Widen the mask - mask = (unsigned char)psSig->ui32Mask; - mask = ((mask & 8) << 3) | ((mask & 4) << 2) | ((mask & 2) << 1) | (mask & 1); - mask = mask | (mask << 1); - // Write output - outTable[psSig->ui32Register] |= (currCounter & mask); - // Update counter - counters[psSig->ui32Register]++; - } + // Fill the char, 2 bits per component so that each 2 bits encode the following info: + // 0: unused OR used by the first signature we happened to see + // 1: used by the second signature + // 2: used by the third sig + // 3: used by the fourth sig. + + // The counters for each input/output/patch. Start with 8 registers, grow as needed + std::vector counters(8, (unsigned char)0); + outTable.clear(); + outTable.resize(8, (unsigned char)0); + + size_t i; + for (i = 0; i < psSignatures.size(); i++) + { + ShaderInfo::InOutSignature *psSig = &psSignatures[i]; + char currCounter; + char mask; + ASSERT(psSig != NULL); + + // We'll skip SV_Depth and others that put -1 to the register. + if (psSig->ui32Register == 0xffffffffu) + continue; + + // Make sure there's enough room in the table + if (psSig->ui32Register >= counters.size()) + { + counters.resize(psSig->ui32Register * 2, 0); + outTable.resize(psSig->ui32Register * 2, 0); + } + + // Apply counter value to masked items + currCounter = counters[psSig->ui32Register]; + // Duplicate counter bits + currCounter = currCounter | (currCounter << 2) | (currCounter << 4) | (currCounter << 6); + // Widen the mask + mask = (unsigned char)psSig->ui32Mask; + mask = ((mask & 8) << 3) | ((mask & 4) << 2) | ((mask & 2) << 1) | (mask & 1); + mask = mask | (mask << 1); + // Write output + outTable[psSig->ui32Register] |= (currCounter & mask); + // Update counter + counters[psSig->ui32Register]++; + } } void Shader::DoIOOverlapOperand(ShaderPhase *psPhase, Operand *psOperand) { - uint32_t i; - uint32_t regSpace = psOperand->GetRegisterSpace(eShaderType, psPhase->ePhase); - unsigned char *redirectTable = NULL; - unsigned char redir = 0; - unsigned char firstFound = 0; - uint32_t mask; - - for (i = 0; i < MAX_SUB_OPERANDS; i++) - if (psOperand->m_SubOperands[i].get()) - DoIOOverlapOperand(psPhase, psOperand->m_SubOperands[i].get()); - - - switch (psOperand->eType) - { - case OPERAND_TYPE_INPUT: - case OPERAND_TYPE_INPUT_CONTROL_POINT: - case OPERAND_TYPE_INPUT_PATCH_CONSTANT: - redirectTable = regSpace == 0 ? &psPhase->acInputNeedsRedirect[0] : &psPhase->acPatchConstantsNeedsRedirect[0]; - break; - - case OPERAND_TYPE_OUTPUT: - case OPERAND_TYPE_OUTPUT_CONTROL_POINT: - redirectTable = regSpace == 0 ? &psPhase->acOutputNeedsRedirect[0] : &psPhase->acPatchConstantsNeedsRedirect[0]; - break; - - default: - // Not a input or output, nothing to do here - return; - } - - redir = redirectTable[psOperand->ui32RegisterNumber]; - - if (redir == 0xff) // Already found overlap? - return; - - mask = psOperand->GetAccessMask(); - i = 0; - // Find the first mask bit set. - while ((mask & (1 << i)) == 0) - i++; - - firstFound = (redir >> (i * 2)) & 3; - for (; i < 4; i++) - { - unsigned char sig; - if ((mask & (1 << i)) == 0) - continue; - - sig = (redir >> (i * 2)) & 3; - // All set bits must access the same signature - if (sig != firstFound) - { - redirectTable[psOperand->ui32RegisterNumber] = 0xff; - return; - } - } - + uint32_t i; + uint32_t regSpace = psOperand->GetRegisterSpace(eShaderType, psPhase->ePhase); + unsigned char *redirectTable = NULL; + unsigned char redir = 0; + unsigned char firstFound = 0; + uint32_t mask; + + for (i = 0; i < MAX_SUB_OPERANDS; i++) + if (psOperand->m_SubOperands[i].get()) + DoIOOverlapOperand(psPhase, psOperand->m_SubOperands[i].get()); + + + switch (psOperand->eType) + { + case OPERAND_TYPE_INPUT: + case OPERAND_TYPE_INPUT_CONTROL_POINT: + case OPERAND_TYPE_INPUT_PATCH_CONSTANT: + redirectTable = regSpace == 0 ? &psPhase->acInputNeedsRedirect[0] : &psPhase->acPatchConstantsNeedsRedirect[0]; + break; + + case OPERAND_TYPE_OUTPUT: + case OPERAND_TYPE_OUTPUT_CONTROL_POINT: + redirectTable = regSpace == 0 ? &psPhase->acOutputNeedsRedirect[0] : &psPhase->acPatchConstantsNeedsRedirect[0]; + break; + + default: + // Not a input or output, nothing to do here + return; + } + + redir = redirectTable[psOperand->ui32RegisterNumber]; + + if (redir == 0xff) // Already found overlap? + return; + + mask = psOperand->GetAccessMask(); + i = 0; + // Find the first mask bit set. + while ((mask & (1 << i)) == 0) + i++; + + firstFound = (redir >> (i * 2)) & 3; + for (; i < 4; i++) + { + unsigned char sig; + if ((mask & (1 << i)) == 0) + continue; + + sig = (redir >> (i * 2)) & 3; + // All set bits must access the same signature + if (sig != firstFound) + { + redirectTable[psOperand->ui32RegisterNumber] = 0xff; + return; + } + } } static void PruneRedirectEntry(unsigned char &itr) { - if (itr != 0xff) - itr = 0; + if (itr != 0xff) + itr = 0; } // Check if inputs and outputs are accessed across semantic boundaries // as in, 2x texcoord vec2's are packed together as vec4 but still accessed together. void Shader::AnalyzeIOOverlap() { - uint32_t i, k; - std::vector outData; - DoSignatureAnalysis(sInfo.psInputSignatures, outData); - - // Now data has the values, copy them to all phases - for (i = 0; i < asPhases.size(); i++) - asPhases[i].acInputNeedsRedirect = outData; - - DoSignatureAnalysis(sInfo.psOutputSignatures, outData); - for (i = 0; i < asPhases.size(); i++) - asPhases[i].acOutputNeedsRedirect = outData; - - DoSignatureAnalysis(sInfo.psPatchConstantSignatures, outData); - for (i = 0; i < asPhases.size(); i++) - asPhases[i].acPatchConstantsNeedsRedirect = outData; - - // Now walk through all operands and suboperands in all instructions and write 0xff to the dest (cannot occur otherwise) - // if we're crossing signature borders - for (i = 0; i < asPhases.size(); i++) - { - ShaderPhase *psPhase = &asPhases[i]; - for (k = 0; k < psPhase->psInst.size(); k++) - { - Instruction *psInst = &psPhase->psInst[k]; - uint32_t j; - for (j = 0; j < psInst->ui32NumOperands; j++) - DoIOOverlapOperand(psPhase, &psInst->asOperands[j]); - } - - // Now prune all tables from anything except 0xff. - std::for_each(psPhase->acInputNeedsRedirect.begin(), psPhase->acInputNeedsRedirect.end(), PruneRedirectEntry); - std::for_each(psPhase->acOutputNeedsRedirect.begin(), psPhase->acOutputNeedsRedirect.end(), PruneRedirectEntry); - std::for_each(psPhase->acPatchConstantsNeedsRedirect.begin(), psPhase->acPatchConstantsNeedsRedirect.end(), PruneRedirectEntry); - } + uint32_t i, k; + std::vector outData; + DoSignatureAnalysis(sInfo.psInputSignatures, outData); + + // Now data has the values, copy them to all phases + for (i = 0; i < asPhases.size(); i++) + asPhases[i].acInputNeedsRedirect = outData; + + DoSignatureAnalysis(sInfo.psOutputSignatures, outData); + for (i = 0; i < asPhases.size(); i++) + asPhases[i].acOutputNeedsRedirect = outData; + + DoSignatureAnalysis(sInfo.psPatchConstantSignatures, outData); + for (i = 0; i < asPhases.size(); i++) + asPhases[i].acPatchConstantsNeedsRedirect = outData; + + // Now walk through all operands and suboperands in all instructions and write 0xff to the dest (cannot occur otherwise) + // if we're crossing signature borders + for (i = 0; i < asPhases.size(); i++) + { + ShaderPhase *psPhase = &asPhases[i]; + for (k = 0; k < psPhase->psInst.size(); k++) + { + Instruction *psInst = &psPhase->psInst[k]; + uint32_t j; + for (j = 0; j < psInst->ui32NumOperands; j++) + DoIOOverlapOperand(psPhase, &psInst->asOperands[j]); + } + + // Now prune all tables from anything except 0xff. + std::for_each(psPhase->acInputNeedsRedirect.begin(), psPhase->acInputNeedsRedirect.end(), PruneRedirectEntry); + std::for_each(psPhase->acOutputNeedsRedirect.begin(), psPhase->acOutputNeedsRedirect.end(), PruneRedirectEntry); + std::for_each(psPhase->acPatchConstantsNeedsRedirect.begin(), psPhase->acPatchConstantsNeedsRedirect.end(), PruneRedirectEntry); + } } - // In DX bytecode, all const arrays are vec4's, and all arrays are stuffed to one large array. // Luckily, each chunk is always accessed with suboperand plus (in ui32RegisterNumber) // So do an analysis pass. Also trim the vec4's into smaller formats if the extra components are never read. void ShaderPhase::PruneConstArrays() { - using namespace std; - auto customDataItr = find_if(psDecl.begin(), psDecl.end(), [](const Declaration &d) { return d.eOpcode == OPCODE_CUSTOMDATA; }); - // Not found? We're done. - if (customDataItr == psDecl.end()) - return; - - // Store the original declaration - m_ConstantArrayInfo.m_OrigDeclaration = &(*customDataItr); - - // Loop through each operand and pick up usage masks - HLSLcc::ForEachOperand(psInst.begin(), psInst.end(), FEO_FLAG_ALL, [this](const std::vector::iterator &psInst, const Operand *psOperand, uint32_t ui32OperandType) - { - using namespace std; - if (psOperand->eType == OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER) - { - uint32_t accessMask = psOperand->GetAccessMask(); - uint32_t offset = psOperand->ui32RegisterNumber; - - // Update the chunk access mask - - // Find all existing entries that have anything common with the access mask - auto cbrange = m_ConstantArrayInfo.m_Chunks.equal_range(offset); - vector matchingEntries; - for (auto itr = cbrange.first; itr != cbrange.second; itr++) - { - if ((itr->second.m_AccessMask & accessMask) != 0) - { - matchingEntries.push_back(itr); - } - }; - - if (matchingEntries.empty()) - { - // Not found, create new entry - m_ConstantArrayInfo.m_Chunks.insert(make_pair(offset, ConstantArrayChunk(0u, accessMask, (Operand *)psOperand))); - } - else if(matchingEntries.size() == 1) - { - // Update access mask of the one existing entry - matchingEntries[0]->second.m_AccessMask |= accessMask; - matchingEntries[0]->second.m_UseSites.push_back((Operand *)psOperand); - } - else - { - // Multiple entries with (now) overlapping mask. Merge to the first one. - ChunkMap::iterator tgt = matchingEntries[0]; - tgt->second.m_AccessMask |= accessMask; - tgt->second.m_UseSites.push_back((Operand *)psOperand); - ChunkMap &chunks = m_ConstantArrayInfo.m_Chunks; - for_each(matchingEntries.begin() + 1, matchingEntries.end(), [&tgt, &chunks](ChunkMap::iterator itr) - { - tgt->second.m_AccessMask |= itr->second.m_AccessMask; - chunks.erase(itr); - }); - } - } - }); - - // Figure out how large each chunk is by finding the next chunk that uses any bits from the current mask (or the max size if not found) - - uint32_t totalSize = (uint32_t)m_ConstantArrayInfo.m_OrigDeclaration->asImmediateConstBuffer.size(); - for (auto chunk = m_ConstantArrayInfo.m_Chunks.begin(); chunk != m_ConstantArrayInfo.m_Chunks.end(); chunk++) - { - // Find the next chunk that shares any bits in the access mask - auto nextItr = find_if(m_ConstantArrayInfo.m_Chunks.lower_bound(chunk->first + 1), m_ConstantArrayInfo.m_Chunks.end(), [&chunk](ChunkMap::value_type &itr) - { - return (chunk->second.m_AccessMask & itr.second.m_AccessMask) != 0; - }); - - // Not found? Must continue until the end of array - if (nextItr == m_ConstantArrayInfo.m_Chunks.end()) - chunk->second.m_Size = totalSize - chunk->first; - else - { - // Otherwise we know the chunk size directly. - chunk->second.m_Size = nextItr->first - chunk->first; - } - - // Do rebase on the operands if necessary - chunk->second.m_Rebase = 0; - uint32_t t = chunk->second.m_AccessMask; - ASSERT(t != 0); - while ((t & 1) == 0) - { - chunk->second.m_Rebase++; - t >>= 1; - } - uint32_t rebase = chunk->second.m_Rebase; - uint32_t componentCount = 0; - while (t != 0) - { - componentCount++; - t >>= 1; - } - chunk->second.m_ComponentCount = componentCount; - - for_each(chunk->second.m_UseSites.begin(), chunk->second.m_UseSites.end(), [&rebase, &componentCount](Operand *op) - { - // Store the rebase value to each operand and do the actual rebase. - op->m_Rebase = rebase; - op->m_Size = componentCount; - - if (rebase != 0) - { - // Update component mask. Note that we don't need to do anything to the suboperands. They do not affect destination writemask. - switch (op->eSelMode) - { - case OPERAND_4_COMPONENT_MASK_MODE: - { - uint32_t oldMask = op->ui32CompMask; - if (oldMask == 0) - oldMask = OPERAND_4_COMPONENT_MASK_ALL; - - // Check that we're not losing any information - ASSERT((oldMask >> rebase) << rebase == oldMask); - op->ui32CompMask = (oldMask >> rebase); - break; - } - case OPERAND_4_COMPONENT_SELECT_1_MODE: - ASSERT(op->aui32Swizzle[0] >= rebase); - op->aui32Swizzle[0] -= rebase; - break; - case OPERAND_4_COMPONENT_SWIZZLE_MODE: - { - for (int i = 0; i < 4; i++) - { - // Note that this rebase is different from the one done for source operands - ASSERT(op->aui32Swizzle[i] >= rebase); - op->aui32Swizzle[i] -= rebase; - } - break; - } - default: - ASSERT(0); - - } - } - }); - } - - - // We'll do the actual declaration and pruning later on, now that we have the info stored up. - + using namespace std; + auto customDataItr = find_if(psDecl.begin(), psDecl.end(), [](const Declaration &d) { return d.eOpcode == OPCODE_CUSTOMDATA; }); + // Not found? We're done. + if (customDataItr == psDecl.end()) + return; + + // Store the original declaration + m_ConstantArrayInfo.m_OrigDeclaration = &(*customDataItr); + + // Loop through each operand and pick up usage masks + HLSLcc::ForEachOperand(psInst.begin(), psInst.end(), FEO_FLAG_ALL, [this](const std::vector::iterator &psInst, const Operand *psOperand, uint32_t ui32OperandType) + { + using namespace std; + if (psOperand->eType == OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER) + { + uint32_t accessMask = psOperand->GetAccessMask(); + uint32_t offset = psOperand->ui32RegisterNumber; + + // Update the chunk access mask + + // Find all existing entries that have anything common with the access mask + auto cbrange = m_ConstantArrayInfo.m_Chunks.equal_range(offset); + vector matchingEntries; + for (auto itr = cbrange.first; itr != cbrange.second; itr++) + { + if ((itr->second.m_AccessMask & accessMask) != 0) + { + matchingEntries.push_back(itr); + } + } + + if (matchingEntries.empty()) + { + // Not found, create new entry + m_ConstantArrayInfo.m_Chunks.insert(make_pair(offset, ConstantArrayChunk(0u, accessMask, (Operand *)psOperand))); + } + else if (matchingEntries.size() == 1) + { + // Update access mask of the one existing entry + matchingEntries[0]->second.m_AccessMask |= accessMask; + matchingEntries[0]->second.m_UseSites.push_back((Operand *)psOperand); + } + else + { + // Multiple entries with (now) overlapping mask. Merge to the first one. + ChunkMap::iterator tgt = matchingEntries[0]; + tgt->second.m_AccessMask |= accessMask; + tgt->second.m_UseSites.push_back((Operand *)psOperand); + ChunkMap &chunks = m_ConstantArrayInfo.m_Chunks; + for_each(matchingEntries.begin() + 1, matchingEntries.end(), [&tgt, &chunks](ChunkMap::iterator itr) + { + tgt->second.m_AccessMask |= itr->second.m_AccessMask; + chunks.erase(itr); + }); + } + } + }); + + // Figure out how large each chunk is by finding the next chunk that uses any bits from the current mask (or the max size if not found) + + uint32_t totalSize = (uint32_t)m_ConstantArrayInfo.m_OrigDeclaration->asImmediateConstBuffer.size(); + for (auto chunk = m_ConstantArrayInfo.m_Chunks.begin(); chunk != m_ConstantArrayInfo.m_Chunks.end(); chunk++) + { + // Find the next chunk that shares any bits in the access mask + auto nextItr = find_if(m_ConstantArrayInfo.m_Chunks.lower_bound(chunk->first + 1), m_ConstantArrayInfo.m_Chunks.end(), [&chunk](ChunkMap::value_type &itr) + { + return (chunk->second.m_AccessMask & itr.second.m_AccessMask) != 0; + }); + + // Not found? Must continue until the end of array + if (nextItr == m_ConstantArrayInfo.m_Chunks.end()) + chunk->second.m_Size = totalSize - chunk->first; + else + { + // Otherwise we know the chunk size directly. + chunk->second.m_Size = nextItr->first - chunk->first; + } + + // Do rebase on the operands if necessary + chunk->second.m_Rebase = 0; + uint32_t t = chunk->second.m_AccessMask; + ASSERT(t != 0); + while ((t & 1) == 0) + { + chunk->second.m_Rebase++; + t >>= 1; + } + uint32_t rebase = chunk->second.m_Rebase; + uint32_t componentCount = 0; + while (t != 0) + { + componentCount++; + t >>= 1; + } + chunk->second.m_ComponentCount = componentCount; + + for_each(chunk->second.m_UseSites.begin(), chunk->second.m_UseSites.end(), [&rebase, &componentCount](Operand *op) + { + // Store the rebase value to each operand and do the actual rebase. + op->m_Rebase = rebase; + op->m_Size = componentCount; + + if (rebase != 0) + { + // Update component mask. Note that we don't need to do anything to the suboperands. They do not affect destination writemask. + switch (op->eSelMode) + { + case OPERAND_4_COMPONENT_MASK_MODE: + { + uint32_t oldMask = op->ui32CompMask; + if (oldMask == 0) + oldMask = OPERAND_4_COMPONENT_MASK_ALL; + + // Check that we're not losing any information + ASSERT((oldMask >> rebase) << rebase == oldMask); + op->ui32CompMask = (oldMask >> rebase); + break; + } + case OPERAND_4_COMPONENT_SELECT_1_MODE: + ASSERT(op->aui32Swizzle[0] >= rebase); + op->aui32Swizzle[0] -= rebase; + break; + case OPERAND_4_COMPONENT_SWIZZLE_MODE: + { + for (int i = 0; i < 4; i++) + { + // Note that this rebase is different from the one done for source operands + ASSERT(op->aui32Swizzle[i] >= rebase); + op->aui32Swizzle[i] -= rebase; + } + break; + } + default: + ASSERT(0); + } + } + }); + } + + + // We'll do the actual declaration and pruning later on, now that we have the info stored up. } HLSLcc::ControlFlow::ControlFlowGraph &ShaderPhase::GetCFG() { - if (!m_CFGInitialized) - { - m_CFG.Build(&psInst[0]); - m_CFGInitialized = true; - } + if (!m_CFGInitialized) + { + m_CFG.Build(&psInst[0]); + m_CFGInitialized = true; + } - return m_CFG; + return m_CFG; } void ShaderPhase::UnvectorizeImmMoves() { - // NOTE must be called before datatype analysis and other analysis phases are done, as the pointers won't match anymore - // (we insert new instructions there) - using namespace std; - vector nInst; - // Reserve 1.5x space - nInst.reserve(psInst.size() * 3 / 2); - - for_each(psInst.begin(), psInst.end(), [&](Instruction &i) - { - if (i.eOpcode != OPCODE_MOV || i.asOperands[0].eType != OPERAND_TYPE_TEMP || i.asOperands[1].eType != OPERAND_TYPE_IMMEDIATE32 || i.asOperands[0].GetNumSwizzleElements() == 1) - { - nInst.push_back(i); - return; - } - // Ok, found one to unvectorize. - ASSERT(i.asOperands[0].eSelMode == OPERAND_4_COMPONENT_MASK_MODE); - uint32_t mask = i.asOperands[0].ui32CompMask; - for (uint32_t j = 0; j < 4; j++) - { - if ((mask & (1 << j)) == 0) - continue; - - Instruction ni = i; - ni.asOperands[0].ui32CompMask = (1 << j); - nInst.push_back(ni); - } - }); - psInst.clear(); - psInst.swap(nInst); + // NOTE must be called before datatype analysis and other analysis phases are done, as the pointers won't match anymore + // (we insert new instructions there) + using namespace std; + vector nInst; + // Reserve 1.5x space + nInst.reserve(psInst.size() * 3 / 2); + + for_each(psInst.begin(), psInst.end(), [&](Instruction &i) + { + if (i.eOpcode != OPCODE_MOV || i.asOperands[0].eType != OPERAND_TYPE_TEMP || i.asOperands[1].eType != OPERAND_TYPE_IMMEDIATE32 || i.asOperands[0].GetNumSwizzleElements() == 1) + { + nInst.push_back(i); + return; + } + // Ok, found one to unvectorize. + ASSERT(i.asOperands[0].eSelMode == OPERAND_4_COMPONENT_MASK_MODE); + uint32_t mask = i.asOperands[0].ui32CompMask; + for (uint32_t j = 0; j < 4; j++) + { + if ((mask & (1 << j)) == 0) + continue; + + Instruction ni = i; + ni.asOperands[0].ui32CompMask = (1 << j); + nInst.push_back(ni); + } + }); + psInst.clear(); + psInst.swap(nInst); } void ShaderPhase::ExpandSWAPCs() { - // First find the DCL_TEMPS declaration - auto dcitr = std::find_if(psDecl.begin(), psDecl.end(), [](const Declaration &decl) -> bool { return decl.eOpcode == OPCODE_DCL_TEMPS; }); - if (dcitr == psDecl.end()) - { - // No temp declaration? Probably we won't have SWAPC either, then. - return; - } - Declaration &tmpDecl = *dcitr; - - uint32_t extraTemp = 0; - bool extraTempAllocated = false; - - // Parse through instructions, open up SWAPCs if necessary - while (1) - { - // Need to find from top every time, because we're inserting stuff into the vector - auto swapItr = std::find_if(psInst.begin(), psInst.end(), [](const Instruction &inst) -> bool { return inst.eOpcode == OPCODE_SWAPC; }); - if (swapItr == psInst.end()) - break; - - // Ok swapItr now points to a SWAPC instruction that we'll have to split up like this (from MSDN): - -/* swapc dest0[.mask], - dest1[.mask], - src0[.swizzle], - src1[.swizzle], - src2[.swizzle] - - expands to : - - movc temp[dest0s mask], - src0[.swizzle], - src2[.swizzle], src1[.swizzle] - - movc dest1[.mask], - src0[.swizzle], - src1[.swizzle], src2[.swizzle] - - mov dest0.mask, temp + // First find the DCL_TEMPS declaration + auto dcitr = std::find_if(psDecl.begin(), psDecl.end(), [](const Declaration &decl) -> bool { return decl.eOpcode == OPCODE_DCL_TEMPS; }); + if (dcitr == psDecl.end()) + { + // No temp declaration? Probably we won't have SWAPC either, then. + return; + } + Declaration &tmpDecl = *dcitr; + + uint32_t extraTemp = 0; + bool extraTempAllocated = false; + + // Parse through instructions, open up SWAPCs if necessary + while (1) + { + // Need to find from top every time, because we're inserting stuff into the vector + auto swapItr = std::find_if(psInst.begin(), psInst.end(), [](const Instruction &inst) -> bool { return inst.eOpcode == OPCODE_SWAPC; }); + if (swapItr == psInst.end()) + break; + + // Ok swapItr now points to a SWAPC instruction that we'll have to split up like this (from MSDN): + +/* swapc dest0[.mask], + dest1[.mask], + src0[.swizzle], + src1[.swizzle], + src2[.swizzle] + + expands to : + + movc temp[dest0s mask], + src0[.swizzle], + src2[.swizzle], src1[.swizzle] + + movc dest1[.mask], + src0[.swizzle], + src1[.swizzle], src2[.swizzle] + + mov dest0.mask, temp */ - // Allocate a new temp, if not already done - if (!extraTempAllocated) - { - extraTemp = tmpDecl.value.ui32NumTemps++; - extraTempAllocated = true; - } - - Instruction origSwapInst; + // Allocate a new temp, if not already done + if (!extraTempAllocated) + { + extraTemp = tmpDecl.value.ui32NumTemps++; + extraTempAllocated = true; + } + + Instruction origSwapInst; #if _DEBUG - origSwapInst.id = swapItr->id; + origSwapInst.id = swapItr->id; #endif - std::swap(*swapItr, origSwapInst); // Store the original swapc for reading - - // OP 1: MOVC temp[dest0 mask], src0, src2, stc1 - swapItr->eOpcode = OPCODE_MOVC; - swapItr->ui32NumOperands = 4; - swapItr->ui32FirstSrc = 1; - swapItr->asOperands[0] = origSwapInst.asOperands[0]; - swapItr->asOperands[0].eType = OPERAND_TYPE_TEMP; - swapItr->asOperands[0].ui32RegisterNumber = extraTemp; - // mask is already fine - swapItr->asOperands[1] = origSwapInst.asOperands[2]; // src0 - swapItr->asOperands[2] = origSwapInst.asOperands[4]; // src2 - swapItr->asOperands[3] = origSwapInst.asOperands[3]; // src1 - // swapItr is already in the psInst vector. - - Instruction newInst[2] = { Instruction(), Instruction() }; - // OP 2: MOVC dest1, src0, src1, src2 - newInst[0].eOpcode = OPCODE_MOVC; - newInst[0].ui32NumOperands = 4; - newInst[0].ui32FirstSrc = 1; - newInst[0].asOperands[0] = origSwapInst.asOperands[1]; // dest1 - newInst[0].asOperands[1] = origSwapInst.asOperands[2]; // src0 - newInst[0].asOperands[2] = origSwapInst.asOperands[3]; // src1 - newInst[0].asOperands[3] = origSwapInst.asOperands[4]; // src2 + std::swap(*swapItr, origSwapInst); // Store the original swapc for reading + + // OP 1: MOVC temp[dest0 mask], src0, src2, stc1 + swapItr->eOpcode = OPCODE_MOVC; + swapItr->ui32NumOperands = 4; + swapItr->ui32FirstSrc = 1; + swapItr->asOperands[0] = origSwapInst.asOperands[0]; + swapItr->asOperands[0].eType = OPERAND_TYPE_TEMP; + swapItr->asOperands[0].ui32RegisterNumber = extraTemp; + // mask is already fine + swapItr->asOperands[1] = origSwapInst.asOperands[2]; // src0 + swapItr->asOperands[2] = origSwapInst.asOperands[4]; // src2 + swapItr->asOperands[3] = origSwapInst.asOperands[3]; // src1 + // swapItr is already in the psInst vector. + + Instruction newInst[2] = { Instruction(), Instruction() }; + // OP 2: MOVC dest1, src0, src1, src2 + newInst[0].eOpcode = OPCODE_MOVC; + newInst[0].ui32NumOperands = 4; + newInst[0].ui32FirstSrc = 1; + newInst[0].asOperands[0] = origSwapInst.asOperands[1]; // dest1 + newInst[0].asOperands[1] = origSwapInst.asOperands[2]; // src0 + newInst[0].asOperands[2] = origSwapInst.asOperands[3]; // src1 + newInst[0].asOperands[3] = origSwapInst.asOperands[4]; // src2 #if _DEBUG - newInst[0].id = swapItr->id; + newInst[0].id = swapItr->id; #endif - // OP 3: mov dest0.mask, temp - newInst[1].eOpcode = OPCODE_MOV; - newInst[1].ui32NumOperands = 2; - newInst[1].ui32FirstSrc = 1; - newInst[1].asOperands[0] = origSwapInst.asOperands[0]; // dest 0 - // First copy dest0 to src as well to get the mask set up correctly - newInst[1].asOperands[1] = origSwapInst.asOperands[0]; // dest 0; - // Then overwrite with temp reg - newInst[1].asOperands[1].eType = OPERAND_TYPE_TEMP; - newInst[1].asOperands[1].ui32RegisterNumber = extraTemp; + // OP 3: mov dest0.mask, temp + newInst[1].eOpcode = OPCODE_MOV; + newInst[1].ui32NumOperands = 2; + newInst[1].ui32FirstSrc = 1; + newInst[1].asOperands[0] = origSwapInst.asOperands[0]; // dest 0 + // First copy dest0 to src as well to get the mask set up correctly + newInst[1].asOperands[1] = origSwapInst.asOperands[0]; // dest 0; + // Then overwrite with temp reg + newInst[1].asOperands[1].eType = OPERAND_TYPE_TEMP; + newInst[1].asOperands[1].ui32RegisterNumber = extraTemp; #if _DEBUG - newInst[1].id = swapItr->id; + newInst[1].id = swapItr->id; #endif - // Insert the new instructions to the vector - psInst.insert(swapItr + 1, newInst, newInst + 2); - } + // Insert the new instructions to the vector + psInst.insert(swapItr + 1, newInst, newInst + 2); + } } void Shader::ExpandSWAPCs() { - // Just call ExpandSWAPCs for each phase - for (int i = 0; i < asPhases.size(); i++) - { - asPhases[i].ExpandSWAPCs(); - } + // Just call ExpandSWAPCs for each phase + for (int i = 0; i < asPhases.size(); i++) + { + asPhases[i].ExpandSWAPCs(); + } } void Shader::ForcePositionToHighp() { - // Only sensible in vertex shaders (TODO: is this an issue in tessellation shaders? Do we even care?) - if (eShaderType != VERTEX_SHADER) - return; - - ShaderPhase &phase = asPhases[0]; - - // Find the output declaration - std::vector::iterator itr = std::find_if(phase.psDecl.begin(), phase.psDecl.end(), [this](const Declaration &decl) -> bool - { - if (decl.eOpcode == OPCODE_DCL_OUTPUT_SIV) - { - const SPECIAL_NAME specialName = decl.asOperands[0].eSpecialName; - if (specialName == NAME_POSITION || - specialName == NAME_UNDEFINED) // This might be SV_Position (because d3dcompiler is weird). - { - const ShaderInfo::InOutSignature *sig = NULL; - sInfo.GetOutputSignatureFromRegister(decl.asOperands[0].ui32RegisterNumber, decl.asOperands[0].GetAccessMask(), 0, &sig); - ASSERT(sig != NULL); - if ((sig->eSystemValueType == NAME_POSITION || sig->semanticName == "POS") && sig->ui32SemanticIndex == 0) - { - ((ShaderInfo::InOutSignature *)sig)->eMinPrec = MIN_PRECISION_DEFAULT; - return true; - } - } - return false; - } - else if (decl.eOpcode == OPCODE_DCL_OUTPUT) - { - const ShaderInfo::InOutSignature *sig = NULL; - sInfo.GetOutputSignatureFromRegister(decl.asOperands[0].ui32RegisterNumber, decl.asOperands[0].GetAccessMask(), 0, &sig); - ASSERT(sig != NULL); - if ((sig->eSystemValueType == NAME_POSITION || sig->semanticName == "POS") && sig->ui32SemanticIndex == 0) - { - ((ShaderInfo::InOutSignature *)sig)->eMinPrec = MIN_PRECISION_DEFAULT; - return true; - } - return false; - } + // Only sensible in vertex shaders (TODO: is this an issue in tessellation shaders? Do we even care?) + if (eShaderType != VERTEX_SHADER) + return; + + ShaderPhase &phase = asPhases[0]; + + // Find the output declaration + std::vector::iterator itr = std::find_if(phase.psDecl.begin(), phase.psDecl.end(), [this](const Declaration &decl) -> bool + { + if (decl.eOpcode == OPCODE_DCL_OUTPUT_SIV) + { + const SPECIAL_NAME specialName = decl.asOperands[0].eSpecialName; + if (specialName == NAME_POSITION || + specialName == NAME_UNDEFINED) // This might be SV_Position (because d3dcompiler is weird). + { + const ShaderInfo::InOutSignature *sig = NULL; + sInfo.GetOutputSignatureFromRegister(decl.asOperands[0].ui32RegisterNumber, decl.asOperands[0].GetAccessMask(), 0, &sig); + ASSERT(sig != NULL); + if ((sig->eSystemValueType == NAME_POSITION || sig->semanticName == "POS") && sig->ui32SemanticIndex == 0) + { + ((ShaderInfo::InOutSignature *)sig)->eMinPrec = MIN_PRECISION_DEFAULT; + return true; + } + } + return false; + } + else if (decl.eOpcode == OPCODE_DCL_OUTPUT) + { + const ShaderInfo::InOutSignature *sig = NULL; + sInfo.GetOutputSignatureFromRegister(decl.asOperands[0].ui32RegisterNumber, decl.asOperands[0].GetAccessMask(), 0, &sig); + ASSERT(sig != NULL); + if ((sig->eSystemValueType == NAME_POSITION || sig->semanticName == "POS") && sig->ui32SemanticIndex == 0) + { + ((ShaderInfo::InOutSignature *)sig)->eMinPrec = MIN_PRECISION_DEFAULT; + return true; + } + return false; + } return false; + }); - }); - - // Do nothing if we don't find suitable output. This may well be INTERNALTESSPOS for tessellation etc. - if (itr == phase.psDecl.end()) - return; + // Do nothing if we don't find suitable output. This may well be INTERNALTESSPOS for tessellation etc. + if (itr == phase.psDecl.end()) + return; - uint32_t outputPosReg = itr->asOperands[0].ui32RegisterNumber; + uint32_t outputPosReg = itr->asOperands[0].ui32RegisterNumber; HLSLcc::ForEachOperand(phase.psInst.begin(), phase.psInst.end(), FEO_FLAG_DEST_OPERAND, [outputPosReg](std::vector::iterator itr, Operand *op, uint32_t flags) - { - if (op->eType == OPERAND_TYPE_OUTPUT && op->ui32RegisterNumber == outputPosReg) - op->eMinPrecision = OPERAND_MIN_PRECISION_DEFAULT; - }); - + { + if (op->eType == OPERAND_TYPE_OUTPUT && op->ui32RegisterNumber == outputPosReg) + op->eMinPrecision = OPERAND_MIN_PRECISION_DEFAULT; + }); } void Shader::FindUnusedGlobals(uint32_t flags) { - for(int i = 0; i < asPhases.size(); i++) - { - ShaderPhase &phase = asPhases[i]; - - // Loop through every operand and pick up usages - HLSLcc::ForEachOperand(phase.psInst.begin(), phase.psInst.end(), FEO_FLAG_SRC_OPERAND|FEO_FLAG_SUBOPERAND, [&](std::vector::iterator inst, Operand *op, uint32_t flags) - { - // Not a constant buffer read? continue - if(op->eType != OPERAND_TYPE_CONSTANT_BUFFER) - return; - - const uint32_t ui32BindingPoint = op->aui32ArraySizes[0]; - const ConstantBuffer *psCBuf = NULL; - sInfo.GetConstantBufferFromBindingPoint(RGROUP_CBUFFER, ui32BindingPoint, &psCBuf); - - if(!psCBuf) - return; - - // Get all the struct members that can be reached from this usage: - uint32_t mask = op->GetAccessMask(); - for(uint32_t k = 0; k < 4; k++) - { - if((mask & (1 << k)) == 0) - continue; - - uint32_t tmpSwizzle[4] = {k, k, k, k}; - int rebase; - bool isArray; - - ShaderVarType *psVarType = NULL; - - ShaderInfo::GetShaderVarFromOffset(op->aui32ArraySizes[1], tmpSwizzle, psCBuf, (const ShaderVarType**)&psVarType, &isArray, NULL, &rebase, flags); - - // Mark as used. Also all parents. - while(psVarType) - { - psVarType->m_IsUsed = true; - psVarType = psVarType->Parent; - } - } - }); - - } + for (int i = 0; i < asPhases.size(); i++) + { + ShaderPhase &phase = asPhases[i]; + + // Loop through every operand and pick up usages + HLSLcc::ForEachOperand(phase.psInst.begin(), phase.psInst.end(), FEO_FLAG_SRC_OPERAND | FEO_FLAG_SUBOPERAND, [&](std::vector::iterator inst, Operand *op, uint32_t flags) + { + // Not a constant buffer read? continue + if (op->eType != OPERAND_TYPE_CONSTANT_BUFFER) + return; + + const uint32_t ui32BindingPoint = op->aui32ArraySizes[0]; + const ConstantBuffer *psCBuf = NULL; + sInfo.GetConstantBufferFromBindingPoint(RGROUP_CBUFFER, ui32BindingPoint, &psCBuf); + + if (!psCBuf) + return; + + // Get all the struct members that can be reached from this usage: + uint32_t mask = op->GetAccessMask(); + for (uint32_t k = 0; k < 4; k++) + { + if ((mask & (1 << k)) == 0) + continue; + + uint32_t tmpSwizzle[4] = {k, k, k, k}; + int rebase; + bool isArray; + + ShaderVarType *psVarType = NULL; + + ShaderInfo::GetShaderVarFromOffset(op->aui32ArraySizes[1], tmpSwizzle, psCBuf, (const ShaderVarType**)&psVarType, &isArray, NULL, &rebase, flags); + + // Mark as used. Also all parents. + while (psVarType) + { + psVarType->m_IsUsed = true; + psVarType = psVarType->Parent; + } + } + }); + } } - diff --git a/src/ShaderInfo.cpp b/src/ShaderInfo.cpp index e862f8d..a494125 100644 --- a/src/ShaderInfo.cpp +++ b/src/ShaderInfo.cpp @@ -1,4 +1,3 @@ - #include "ShaderInfo.h" #include "internal_includes/debug.h" #include "internal_includes/tokens.h" @@ -8,170 +7,170 @@ SHADER_VARIABLE_TYPE ShaderInfo::GetTextureDataType(uint32_t regNo) { - const ResourceBinding* psBinding = 0; - int found; - found = GetResourceFromBindingPoint(RGROUP_TEXTURE, regNo, &psBinding); - ASSERT(found != 0); - return psBinding->GetDataType(); + const ResourceBinding* psBinding = 0; + int found; + found = GetResourceFromBindingPoint(RGROUP_TEXTURE, regNo, &psBinding); + ASSERT(found != 0); + return psBinding->GetDataType(); } void ShaderInfo::GetConstantBufferFromBindingPoint(const ResourceGroup eGroup, const uint32_t ui32BindPoint, const ConstantBuffer** ppsConstBuf) const { - ASSERT(ui32MajorVersion > 3); - *ppsConstBuf = &psConstantBuffers[aui32ResourceMap[eGroup][ui32BindPoint]]; + ASSERT(ui32MajorVersion > 3); + *ppsConstBuf = &psConstantBuffers[aui32ResourceMap[eGroup][ui32BindPoint]]; } int ShaderInfo::GetResourceFromBindingPoint(const ResourceGroup eGroup, uint32_t const ui32BindPoint, const ResourceBinding** ppsOutBinding) const { - size_t i; - const size_t ui32NumBindings = psResourceBindings.size(); - const ResourceBinding* psBindings = &psResourceBindings[0]; - - for (i = 0; i < ui32NumBindings; ++i) - { - if (ResourceTypeToResourceGroup(psBindings[i].eType) == eGroup) - { - if (ui32BindPoint >= psBindings[i].ui32BindPoint && ui32BindPoint < (psBindings[i].ui32BindPoint + psBindings[i].ui32BindCount)) - { - *ppsOutBinding = psBindings + i; - return 1; - } - } - } - return 0; + size_t i; + const size_t ui32NumBindings = psResourceBindings.size(); + const ResourceBinding* psBindings = &psResourceBindings[0]; + + for (i = 0; i < ui32NumBindings; ++i) + { + if (ResourceTypeToResourceGroup(psBindings[i].eType) == eGroup) + { + if (ui32BindPoint >= psBindings[i].ui32BindPoint && ui32BindPoint < (psBindings[i].ui32BindPoint + psBindings[i].ui32BindCount)) + { + *ppsOutBinding = psBindings + i; + return 1; + } + } + } + return 0; } int ShaderInfo::GetInterfaceVarFromOffset(uint32_t ui32Offset, ShaderVar** ppsShaderVar) const { - size_t i; - const size_t ui32NumVars = psThisPointerConstBuffer->asVars.size(); - - for (i = 0; i < ui32NumVars; ++i) - { - if (ui32Offset >= psThisPointerConstBuffer->asVars[i].ui32StartOffset && - ui32Offset < (psThisPointerConstBuffer->asVars[i].ui32StartOffset + psThisPointerConstBuffer->asVars[i].ui32Size)) - { - *ppsShaderVar = &psThisPointerConstBuffer->asVars[i]; - return 1; - } - } - return 0; + size_t i; + const size_t ui32NumVars = psThisPointerConstBuffer->asVars.size(); + + for (i = 0; i < ui32NumVars; ++i) + { + if (ui32Offset >= psThisPointerConstBuffer->asVars[i].ui32StartOffset && + ui32Offset < (psThisPointerConstBuffer->asVars[i].ui32StartOffset + psThisPointerConstBuffer->asVars[i].ui32Size)) + { + *ppsShaderVar = &psThisPointerConstBuffer->asVars[i]; + return 1; + } + } + return 0; } int ShaderInfo::GetInputSignatureFromRegister(const uint32_t ui32Register, const uint32_t ui32Mask, const InOutSignature** ppsOut, bool allowNull /* == false */) const { - size_t i; - const size_t ui32NumVars = psInputSignatures.size(); - - for (i = 0; i < ui32NumVars; ++i) - { - if ((ui32Register == psInputSignatures[i].ui32Register) && (((~psInputSignatures[i].ui32Mask) & ui32Mask) == 0)) - { - *ppsOut = &psInputSignatures[i]; - return 1; - } - } - ASSERT(allowNull); - return 0; + size_t i; + const size_t ui32NumVars = psInputSignatures.size(); + + for (i = 0; i < ui32NumVars; ++i) + { + if ((ui32Register == psInputSignatures[i].ui32Register) && (((~psInputSignatures[i].ui32Mask) & ui32Mask) == 0)) + { + *ppsOut = &psInputSignatures[i]; + return 1; + } + } + ASSERT(allowNull); + return 0; } int ShaderInfo::GetPatchConstantSignatureFromRegister(const uint32_t ui32Register, const uint32_t ui32Mask, const InOutSignature** ppsOut, bool allowNull /* == false */) const { - size_t i; - const size_t ui32NumVars = psPatchConstantSignatures.size(); - - for (i = 0; i < ui32NumVars; ++i) - { - if ((ui32Register == psPatchConstantSignatures[i].ui32Register) && (((~psPatchConstantSignatures[i].ui32Mask) & ui32Mask) == 0)) - { - *ppsOut = &psPatchConstantSignatures[i]; - return 1; - } - } - - // There are situations (especially when using dcl_indexrange) where the compiler happily writes outside the actual masks. - // In those situations just take the last signature that uses that register (it's typically the "highest" one) - for( i = ui32NumVars - 1; i-- > 0; ) - { - if (ui32Register == psPatchConstantSignatures[i].ui32Register) - { - *ppsOut = &psPatchConstantSignatures[i]; - return 1; - } - } - - ASSERT(allowNull); - return 0; + size_t i; + const size_t ui32NumVars = psPatchConstantSignatures.size(); + + for (i = 0; i < ui32NumVars; ++i) + { + if ((ui32Register == psPatchConstantSignatures[i].ui32Register) && (((~psPatchConstantSignatures[i].ui32Mask) & ui32Mask) == 0)) + { + *ppsOut = &psPatchConstantSignatures[i]; + return 1; + } + } + + // There are situations (especially when using dcl_indexrange) where the compiler happily writes outside the actual masks. + // In those situations just take the last signature that uses that register (it's typically the "highest" one) + for (i = ui32NumVars - 1; i-- > 0;) + { + if (ui32Register == psPatchConstantSignatures[i].ui32Register) + { + *ppsOut = &psPatchConstantSignatures[i]; + return 1; + } + } + + ASSERT(allowNull); + return 0; } int ShaderInfo::GetOutputSignatureFromRegister(const uint32_t ui32Register, - const uint32_t ui32CompMask, - const uint32_t ui32Stream, - const InOutSignature** ppsOut, - bool allowNull /* = false */) const + const uint32_t ui32CompMask, + const uint32_t ui32Stream, + const InOutSignature** ppsOut, + bool allowNull /* = false */) const { - size_t i; - const size_t ui32NumVars = psOutputSignatures.size(); - ASSERT(ui32CompMask != 0); - - for (i = 0; i < ui32NumVars; ++i) - { - if (ui32Register == psOutputSignatures[i].ui32Register && - (ui32CompMask & psOutputSignatures[i].ui32Mask) && - ui32Stream == psOutputSignatures[i].ui32Stream) - { - *ppsOut = &psOutputSignatures[i]; - return 1; - } - } - ASSERT(allowNull); - return 0; + size_t i; + const size_t ui32NumVars = psOutputSignatures.size(); + ASSERT(ui32CompMask != 0); + + for (i = 0; i < ui32NumVars; ++i) + { + if (ui32Register == psOutputSignatures[i].ui32Register && + (ui32CompMask & psOutputSignatures[i].ui32Mask) && + ui32Stream == psOutputSignatures[i].ui32Stream) + { + *ppsOut = &psOutputSignatures[i]; + return 1; + } + } + ASSERT(allowNull); + return 0; } int ShaderInfo::GetOutputSignatureFromSystemValue(SPECIAL_NAME eSystemValueType, uint32_t ui32SemanticIndex, const InOutSignature** ppsOut) const { - size_t i; - const size_t ui32NumVars = psOutputSignatures.size(); - - for (i = 0; i < ui32NumVars; ++i) - { - if (eSystemValueType == psOutputSignatures[i].eSystemValueType && - ui32SemanticIndex == psOutputSignatures[i].ui32SemanticIndex) - { - *ppsOut = &psOutputSignatures[i]; - return 1; - } - } - ASSERT(0); - return 0; + size_t i; + const size_t ui32NumVars = psOutputSignatures.size(); + + for (i = 0; i < ui32NumVars; ++i) + { + if (eSystemValueType == psOutputSignatures[i].eSystemValueType && + ui32SemanticIndex == psOutputSignatures[i].ui32SemanticIndex) + { + *ppsOut = &psOutputSignatures[i]; + return 1; + } + } + ASSERT(0); + return 0; } uint32_t ShaderInfo::GetCBVarSize(const ShaderVarType* psType, bool matrixAsVectors, bool wholeArraySize) { - // Default is regular matrices, vectors and scalars + // Default is regular matrices, vectors and scalars uint32_t size = psType->Columns * psType->Rows * 4; - // Struct size is calculated from the offset and size of its last member. + // Struct size is calculated from the offset and size of its last member. // Need to take into account that members could be arrays. - if (psType->Class == SVC_STRUCT) - { + if (psType->Class == SVC_STRUCT) + { size = psType->Members.back().Offset + GetCBVarSize(&psType->Members.back(), matrixAsVectors, true); - } - // Matrices represented as vec4 arrays have special size calculation - else if (matrixAsVectors) - { - if (psType->Class == SVC_MATRIX_ROWS) - { + } + // Matrices represented as vec4 arrays have special size calculation + else if (matrixAsVectors) + { + if (psType->Class == SVC_MATRIX_ROWS) + { size = psType->Rows * 16; - } - else if (psType->Class == SVC_MATRIX_COLUMNS) - { - size = psType->Columns * 16; - } - } + } + else if (psType->Class == SVC_MATRIX_COLUMNS) + { + size = psType->Columns * 16; + } + } if (wholeArraySize && psType->Elements > 1) - { + { uint32_t paddedSize = ((size + 15) / 16) * 16; // Arrays are padded to float4 size size = (psType->Elements - 1) * paddedSize + size; // Except the last element } @@ -180,146 +179,146 @@ uint32_t ShaderInfo::GetCBVarSize(const ShaderVarType* psType, bool matrixAsVect } static const ShaderVarType* IsOffsetInType(const ShaderVarType* psType, - uint32_t parentOffset, - uint32_t offsetToFind, - bool* isArray, - std::vector* arrayIndices, - int32_t* pi32Rebase, - uint32_t flags) + uint32_t parentOffset, + uint32_t offsetToFind, + bool* isArray, + std::vector* arrayIndices, + int32_t* pi32Rebase, + uint32_t flags) { - uint32_t thisOffset = parentOffset + psType->Offset; - uint32_t thisSize = ShaderInfo::GetCBVarSize(psType, (flags & HLSLCC_FLAG_TRANSLATE_MATRICES) != 0); - uint32_t paddedSize = ((thisSize + 15) / 16) * 16; - uint32_t arraySize = thisSize; - - // Array elements are padded to align on vec4 size, except for the last one - if (psType->Elements) - arraySize = (paddedSize * (psType->Elements - 1)) + thisSize; - - if ((offsetToFind >= thisOffset) && - offsetToFind < (thisOffset + arraySize)) - { - *isArray = false; - if (psType->Class == SVC_STRUCT) - { - if (psType->Elements > 1 && arrayIndices != NULL) - arrayIndices->push_back((offsetToFind - thisOffset) / thisSize); - - // Need to bring offset back to element zero in case of array of structs - uint32_t offsetInStruct = (offsetToFind - thisOffset) % paddedSize; - uint32_t m = 0; - - for (m = 0; m < psType->MemberCount; ++m) - { - const ShaderVarType* psMember = &psType->Members[m]; - - const ShaderVarType* foundType = IsOffsetInType(psMember, thisOffset, thisOffset + offsetInStruct, isArray, arrayIndices, pi32Rebase, flags); - if (foundType != NULL) - return foundType; - } - } - // Check for array of scalars or vectors (both take up 16 bytes per element). - // Matrices are also treated as arrays of vectors. - else if ((psType->Class == SVC_MATRIX_ROWS || psType->Class == SVC_MATRIX_COLUMNS) || - ((psType->Class == SVC_SCALAR || psType->Class == SVC_VECTOR) && psType->Elements > 1)) - { - *isArray = true; - if (arrayIndices != NULL) - arrayIndices->push_back((offsetToFind - thisOffset) / 16); - } - else if (psType->Class == SVC_VECTOR) - { - //Check for vector starting at a non-vec4 offset. - - // cbuffer $Globals - // { - // - // float angle; // Offset: 0 Size: 4 - // float2 angle2; // Offset: 4 Size: 8 - // - // } - - //cb0[0].x = angle - //cb0[0].yzyy = angle2.xyxx - - //Rebase angle2 so that .y maps to .x, .z maps to .y - - pi32Rebase[0] = thisOffset % 16; - } - - return psType; - } - return NULL; + uint32_t thisOffset = parentOffset + psType->Offset; + uint32_t thisSize = ShaderInfo::GetCBVarSize(psType, (flags & HLSLCC_FLAG_TRANSLATE_MATRICES) != 0); + uint32_t paddedSize = ((thisSize + 15) / 16) * 16; + uint32_t arraySize = thisSize; + + // Array elements are padded to align on vec4 size, except for the last one + if (psType->Elements) + arraySize = (paddedSize * (psType->Elements - 1)) + thisSize; + + if ((offsetToFind >= thisOffset) && + offsetToFind < (thisOffset + arraySize)) + { + *isArray = false; + if (psType->Class == SVC_STRUCT) + { + if (psType->Elements > 1 && arrayIndices != NULL) + arrayIndices->push_back((offsetToFind - thisOffset) / thisSize); + + // Need to bring offset back to element zero in case of array of structs + uint32_t offsetInStruct = (offsetToFind - thisOffset) % paddedSize; + uint32_t m = 0; + + for (m = 0; m < psType->MemberCount; ++m) + { + const ShaderVarType* psMember = &psType->Members[m]; + + const ShaderVarType* foundType = IsOffsetInType(psMember, thisOffset, thisOffset + offsetInStruct, isArray, arrayIndices, pi32Rebase, flags); + if (foundType != NULL) + return foundType; + } + } + // Check for array of scalars or vectors (both take up 16 bytes per element). + // Matrices are also treated as arrays of vectors. + else if ((psType->Class == SVC_MATRIX_ROWS || psType->Class == SVC_MATRIX_COLUMNS) || + ((psType->Class == SVC_SCALAR || psType->Class == SVC_VECTOR) && psType->Elements > 1)) + { + *isArray = true; + if (arrayIndices != NULL) + arrayIndices->push_back((offsetToFind - thisOffset) / 16); + } + else if (psType->Class == SVC_VECTOR) + { + //Check for vector starting at a non-vec4 offset. + + // cbuffer $Globals + // { + // + // float angle; // Offset: 0 Size: 4 + // float2 angle2; // Offset: 4 Size: 8 + // + // } + + //cb0[0].x = angle + //cb0[0].yzyy = angle2.xyxx + + //Rebase angle2 so that .y maps to .x, .z maps to .y + + pi32Rebase[0] = thisOffset % 16; + } + + return psType; + } + return NULL; } int ShaderInfo::GetShaderVarFromOffset(const uint32_t ui32Vec4Offset, - const uint32_t(&pui32Swizzle)[4], - const ConstantBuffer* psCBuf, - const ShaderVarType** ppsShaderVar, // Output the found var - bool* isArray, // Output bool that tells if the found var is an array - std::vector* arrayIndices, // Output vector of array indices in order from root parent to the found var - int32_t* pi32Rebase, // Output swizzle rebase - uint32_t flags) + const uint32_t(&pui32Swizzle)[4], + const ConstantBuffer* psCBuf, + const ShaderVarType** ppsShaderVar, // Output the found var + bool* isArray, // Output bool that tells if the found var is an array + std::vector* arrayIndices, // Output vector of array indices in order from root parent to the found var + int32_t* pi32Rebase, // Output swizzle rebase + uint32_t flags) { - size_t i; - - uint32_t ui32ByteOffset = ui32Vec4Offset * 16; - - //Swizzle can point to another variable. In the example below - //cbUIUpdates.g_uMaxFaces would be cb1[2].z. The scalars are combined - //into vectors. psCBuf->ui32NumVars will be 3. - - // cbuffer cbUIUpdates - // { - // float g_fLifeSpan; // Offset: 0 Size: 4 - // float g_fLifeSpanVar; // Offset: 4 Size: 4 [unused] - // float g_fRadiusMin; // Offset: 8 Size: 4 [unused] - // float g_fRadiusMax; // Offset: 12 Size: 4 [unused] - // float g_fGrowTime; // Offset: 16 Size: 4 [unused] - // float g_fStepSize; // Offset: 20 Size: 4 - // float g_fTurnRate; // Offset: 24 Size: 4 - // float g_fTurnSpeed; // Offset: 28 Size: 4 [unused] - // float g_fLeafRate; // Offset: 32 Size: 4 - // float g_fShrinkTime; // Offset: 36 Size: 4 [unused] - // uint g_uMaxFaces; // Offset: 40 Size: 4 - // } - if (pui32Swizzle[0] == OPERAND_4_COMPONENT_Y) - { - ui32ByteOffset += 4; - } - else if (pui32Swizzle[0] == OPERAND_4_COMPONENT_Z) - { - ui32ByteOffset += 8; - } - else if (pui32Swizzle[0] == OPERAND_4_COMPONENT_W) - { - ui32ByteOffset += 12; - } - - const size_t ui32NumVars = psCBuf->asVars.size(); - - for (i = 0; i < ui32NumVars; ++i) - { - ppsShaderVar[0] = IsOffsetInType(&psCBuf->asVars[i].sType, psCBuf->asVars[i].ui32StartOffset, ui32ByteOffset, isArray, arrayIndices, pi32Rebase, flags); - - if (ppsShaderVar[0] != NULL) - return 1; - } - return 0; + size_t i; + + uint32_t ui32ByteOffset = ui32Vec4Offset * 16; + + //Swizzle can point to another variable. In the example below + //cbUIUpdates.g_uMaxFaces would be cb1[2].z. The scalars are combined + //into vectors. psCBuf->ui32NumVars will be 3. + + // cbuffer cbUIUpdates + // { + // float g_fLifeSpan; // Offset: 0 Size: 4 + // float g_fLifeSpanVar; // Offset: 4 Size: 4 [unused] + // float g_fRadiusMin; // Offset: 8 Size: 4 [unused] + // float g_fRadiusMax; // Offset: 12 Size: 4 [unused] + // float g_fGrowTime; // Offset: 16 Size: 4 [unused] + // float g_fStepSize; // Offset: 20 Size: 4 + // float g_fTurnRate; // Offset: 24 Size: 4 + // float g_fTurnSpeed; // Offset: 28 Size: 4 [unused] + // float g_fLeafRate; // Offset: 32 Size: 4 + // float g_fShrinkTime; // Offset: 36 Size: 4 [unused] + // uint g_uMaxFaces; // Offset: 40 Size: 4 + // } + if (pui32Swizzle[0] == OPERAND_4_COMPONENT_Y) + { + ui32ByteOffset += 4; + } + else if (pui32Swizzle[0] == OPERAND_4_COMPONENT_Z) + { + ui32ByteOffset += 8; + } + else if (pui32Swizzle[0] == OPERAND_4_COMPONENT_W) + { + ui32ByteOffset += 12; + } + + const size_t ui32NumVars = psCBuf->asVars.size(); + + for (i = 0; i < ui32NumVars; ++i) + { + ppsShaderVar[0] = IsOffsetInType(&psCBuf->asVars[i].sType, psCBuf->asVars[i].ui32StartOffset, ui32ByteOffset, isArray, arrayIndices, pi32Rebase, flags); + + if (ppsShaderVar[0] != NULL) + return 1; + } + return 0; } // Patches the fullName of the var with given array indices. Does not insert the indexing for the var itself if it is an array. // Searches for brackets and inserts indices one by one. std::string ShaderInfo::GetShaderVarIndexedFullName(const ShaderVarType* psShaderVar, const std::vector& indices, const std::string& dynamicIndex, bool revertDynamicIndexCalc, bool matrixAsVectors) { - std::ostringstream oss; - size_t prevpos = 0; - size_t pos = psShaderVar->fullName.find('[', 0); - uint32_t i = 0; - while (pos != std::string::npos) - { - pos++; - oss << psShaderVar->fullName.substr(prevpos, pos - prevpos); + std::ostringstream oss; + size_t prevpos = 0; + size_t pos = psShaderVar->fullName.find('[', 0); + uint32_t i = 0; + while (pos != std::string::npos) + { + pos++; + oss << psShaderVar->fullName.substr(prevpos, pos - prevpos); // Add possibly given dynamic index for the root array. if (i == 0 && !dynamicIndex.empty()) @@ -340,71 +339,71 @@ std::string ShaderInfo::GetShaderVarIndexedFullName(const ShaderVarType* psShade if (!indices.empty() && indices[i] != 0) oss << " + " << indices[i]; } - else if (i < indices.size()) - oss << indices[i]; + else if (i < indices.size()) + oss << indices[i]; - prevpos = pos; - i++; - pos = psShaderVar->fullName.find('[', prevpos); - } - oss << psShaderVar->fullName.substr(prevpos); + prevpos = pos; + i++; + pos = psShaderVar->fullName.find('[', prevpos); + } + oss << psShaderVar->fullName.substr(prevpos); - return oss.str(); + return oss.str(); } ResourceGroup ShaderInfo::ResourceTypeToResourceGroup(ResourceType eType) { - switch (eType) - { - case RTYPE_CBUFFER: - return RGROUP_CBUFFER; - - case RTYPE_SAMPLER: - return RGROUP_SAMPLER; - - case RTYPE_TEXTURE: - case RTYPE_BYTEADDRESS: - case RTYPE_STRUCTURED: - return RGROUP_TEXTURE; - - case RTYPE_UAV_RWTYPED: - case RTYPE_UAV_RWSTRUCTURED: - case RTYPE_UAV_RWBYTEADDRESS: - case RTYPE_UAV_APPEND_STRUCTURED: - case RTYPE_UAV_CONSUME_STRUCTURED: - case RTYPE_UAV_RWSTRUCTURED_WITH_COUNTER: - return RGROUP_UAV; - - case RTYPE_TBUFFER: - ASSERT(0); // Need to find out which group this belongs to - return RGROUP_TEXTURE; - default: + switch (eType) + { + case RTYPE_CBUFFER: + return RGROUP_CBUFFER; + + case RTYPE_SAMPLER: + return RGROUP_SAMPLER; + + case RTYPE_TEXTURE: + case RTYPE_BYTEADDRESS: + case RTYPE_STRUCTURED: + return RGROUP_TEXTURE; + + case RTYPE_UAV_RWTYPED: + case RTYPE_UAV_RWSTRUCTURED: + case RTYPE_UAV_RWBYTEADDRESS: + case RTYPE_UAV_APPEND_STRUCTURED: + case RTYPE_UAV_CONSUME_STRUCTURED: + case RTYPE_UAV_RWSTRUCTURED_WITH_COUNTER: + return RGROUP_UAV; + + case RTYPE_TBUFFER: + ASSERT(0); // Need to find out which group this belongs to + return RGROUP_TEXTURE; + default: break; - } + } - ASSERT(0); - return RGROUP_CBUFFER; + ASSERT(0); + return RGROUP_CBUFFER; } void ShaderInfo::AddSamplerPrecisions(HLSLccSamplerPrecisionInfo &info) { - if (info.empty()) - return; + if (info.empty()) + return; - for (size_t i = 0; i < psResourceBindings.size(); i++) - { - ResourceBinding *rb = &psResourceBindings[i]; - if (rb->eType != RTYPE_SAMPLER && rb->eType != RTYPE_TEXTURE) - continue; + for (size_t i = 0; i < psResourceBindings.size(); i++) + { + ResourceBinding *rb = &psResourceBindings[i]; + if (rb->eType != RTYPE_SAMPLER && rb->eType != RTYPE_TEXTURE) + continue; - HLSLccSamplerPrecisionInfo::iterator j = info.find(rb->name); // Try finding exact match + HLSLccSamplerPrecisionInfo::iterator j = info.find(rb->name); // Try finding exact match - // If match not found, check if name has "sampler" prefix - // -> try finding a match without the prefix (DX11 style sampler case) - if (j == info.end() && rb->name.compare(0, 7, "sampler") == 0) - j = info.find(rb->name.substr(7, rb->name.size() - 7)); + // If match not found, check if name has "sampler" prefix + // -> try finding a match without the prefix (DX11 style sampler case) + if (j == info.end() && rb->name.compare(0, 7, "sampler") == 0) + j = info.find(rb->name.substr(7, rb->name.size() - 7)); - if (j != info.end()) - rb->ePrecision = j->second; - } + if (j != info.end()) + rb->ePrecision = j->second; + } } diff --git a/src/UseDefineChains.cpp b/src/UseDefineChains.cpp index 5d07332..496854f 100644 --- a/src/UseDefineChains.cpp +++ b/src/UseDefineChains.cpp @@ -1,4 +1,3 @@ - #include "internal_includes/UseDefineChains.h" #include "internal_includes/debug.h" #include "internal_includes/Instruction.h" @@ -16,95 +15,94 @@ using HLSLcc::ForEachOperand; // Debug mode static void UDCheckConsistencyDUChain(uint32_t idx, DefineUseChains &psDUChains, UseDefineChains &psUDChains, ActiveDefinitions &activeDefinitions) { - DefineUseChain::iterator du = psDUChains[idx].begin(); - UseDefineChain::iterator ud = psUDChains[idx].begin(); - while (du != psDUChains[idx].end()) - { - ASSERT(du->index == idx % 4); - // Check that the definition actually writes to idx - { - uint32_t tempReg = idx / 4; - uint32_t offs = idx - (tempReg * 4); - uint32_t accessMask = 1 << offs; - uint32_t i; - int found = 0; - for (i = 0; i < du->psInst->ui32FirstSrc; i++) - { - if (du->psInst->asOperands[i].eType == OPERAND_TYPE_TEMP) - { - if (du->psInst->asOperands[i].ui32RegisterNumber == tempReg) - { - uint32_t writeMask = GetOperandWriteMask(&du->psInst->asOperands[i]); - if (writeMask & accessMask) - { - ASSERT(writeMask == du->writeMask); - found = 1; - break; - } - } - } - } - ASSERT(found); - } - - // Check that each usage of each definition also is found in the use-define chain - UsageSet::iterator ul = du->usages.begin(); - while (ul != du->usages.end()) - { - // Search for the usage in the chain - UseDefineChain::iterator use = ud; - while (use != psUDChains[idx].end() && &*use != *ul) - use++; - ASSERT(use != psUDChains[idx].end()); - ASSERT(&*use == *ul); - - // Check that the mapping back is also found - ASSERT(std::find(use->defines.begin(), use->defines.end(), &*du) != use->defines.end()); - - ul++; - } - - du++; - } + DefineUseChain::iterator du = psDUChains[idx].begin(); + UseDefineChain::iterator ud = psUDChains[idx].begin(); + while (du != psDUChains[idx].end()) + { + ASSERT(du->index == idx % 4); + // Check that the definition actually writes to idx + { + uint32_t tempReg = idx / 4; + uint32_t offs = idx - (tempReg * 4); + uint32_t accessMask = 1 << offs; + uint32_t i; + int found = 0; + for (i = 0; i < du->psInst->ui32FirstSrc; i++) + { + if (du->psInst->asOperands[i].eType == OPERAND_TYPE_TEMP) + { + if (du->psInst->asOperands[i].ui32RegisterNumber == tempReg) + { + uint32_t writeMask = GetOperandWriteMask(&du->psInst->asOperands[i]); + if (writeMask & accessMask) + { + ASSERT(writeMask == du->writeMask); + found = 1; + break; + } + } + } + } + ASSERT(found); + } + + // Check that each usage of each definition also is found in the use-define chain + UsageSet::iterator ul = du->usages.begin(); + while (ul != du->usages.end()) + { + // Search for the usage in the chain + UseDefineChain::iterator use = ud; + while (use != psUDChains[idx].end() && &*use != *ul) + use++; + ASSERT(use != psUDChains[idx].end()); + ASSERT(&*use == *ul); + + // Check that the mapping back is also found + ASSERT(std::find(use->defines.begin(), use->defines.end(), &*du) != use->defines.end()); + + ul++; + } + + du++; + } } static void UDCheckConsistencyUDChain(uint32_t idx, DefineUseChains &psDUChains, UseDefineChains &psUDChains, ActiveDefinitions &activeDefinitions) { - DefineUseChain::iterator du = psDUChains[idx].begin(); - UseDefineChain::iterator ud = psUDChains[idx].begin(); - while (ud != psUDChains[idx].end()) - { - // Check that each definition of each usage also is found in the define-use chain - DefineSet::iterator dl = ud->defines.begin(); - ASSERT(ud->psOp->ui32RegisterNumber == idx / 4); - ASSERT(ud->index == idx % 4); - while (dl != ud->defines.end()) - { - // Search for the definition in the chain - DefineUseChain::iterator def = du; - while (def != psDUChains[idx].end() && &*def != *dl) - def++; - ASSERT(def != psDUChains[idx].end()); - ASSERT(&*def == *dl); - - // Check that the mapping back is also found - ASSERT(std::find(def->usages.begin(), def->usages.end(), &*ud) != def->usages.end()); - - dl++; - } - ud++; - } - + DefineUseChain::iterator du = psDUChains[idx].begin(); + UseDefineChain::iterator ud = psUDChains[idx].begin(); + while (ud != psUDChains[idx].end()) + { + // Check that each definition of each usage also is found in the define-use chain + DefineSet::iterator dl = ud->defines.begin(); + ASSERT(ud->psOp->ui32RegisterNumber == idx / 4); + ASSERT(ud->index == idx % 4); + while (dl != ud->defines.end()) + { + // Search for the definition in the chain + DefineUseChain::iterator def = du; + while (def != psDUChains[idx].end() && &*def != *dl) + def++; + ASSERT(def != psDUChains[idx].end()); + ASSERT(&*def == *dl); + + // Check that the mapping back is also found + ASSERT(std::find(def->usages.begin(), def->usages.end(), &*ud) != def->usages.end()); + + dl++; + } + ud++; + } } static void UDCheckConsistency(uint32_t tempRegs, DefineUseChains &psDUChains, UseDefineChains &psUDChains, ActiveDefinitions &activeDefinitions) { - uint32_t i; - for (i = 0; i < tempRegs * 4; i++) - { - UDCheckConsistencyDUChain(i, psDUChains, psUDChains, activeDefinitions); - UDCheckConsistencyUDChain(i, psDUChains, psUDChains, activeDefinitions); - } + uint32_t i; + for (i = 0; i < tempRegs * 4; i++) + { + UDCheckConsistencyDUChain(i, psDUChains, psUDChains, activeDefinitions); + UDCheckConsistencyUDChain(i, psDUChains, psUDChains, activeDefinitions); + } } #define printf_console printf @@ -116,768 +114,759 @@ using std::for_each; static DefineUseChainEntry *GetOrCreateDefinition(const BasicBlock::Definition &def, DefineUseChain &psDUChain, uint32_t index) { - // Try to find an existing entry - auto itr = std::find_if(psDUChain.begin(), psDUChain.end(), [&](const DefineUseChainEntry &de) - { - return de.psInst == def.m_Instruction && de.psOp == def.m_Operand; - }); - - if (itr != psDUChain.end()) - { - return &(*itr); - } - - // Not found, create - psDUChain.push_front(DefineUseChainEntry()); - DefineUseChainEntry &de = *psDUChain.begin(); - - de.psInst = (Instruction *)def.m_Instruction; - de.psOp = (Operand *)def.m_Operand; - de.index = index; - de.writeMask = def.m_Operand->GetAccessMask(); - de.psSiblings[index] = &de; - - return &de; + // Try to find an existing entry + auto itr = std::find_if(psDUChain.begin(), psDUChain.end(), [&](const DefineUseChainEntry &de) + { + return de.psInst == def.m_Instruction && de.psOp == def.m_Operand; + }); + + if (itr != psDUChain.end()) + { + return &(*itr); + } + + // Not found, create + psDUChain.push_front(DefineUseChainEntry()); + DefineUseChainEntry &de = *psDUChain.begin(); + + de.psInst = (Instruction *)def.m_Instruction; + de.psOp = (Operand *)def.m_Operand; + de.index = index; + de.writeMask = def.m_Operand->GetAccessMask(); + de.psSiblings[index] = &de; + + return &de; } - - // Do flow control analysis on the instructions and build the define-use and use-define chains void BuildUseDefineChains(std::vector &instructions, uint32_t ui32NumTemps, DefineUseChains &psDUChain, UseDefineChains &psUDChain, HLSLcc::ControlFlow::ControlFlowGraph &cfg) { - ActiveDefinitions lastSeenDefinitions(ui32NumTemps * 4, NULL); // Array of pointers to the currently active definition for each temp + ActiveDefinitions lastSeenDefinitions(ui32NumTemps * 4, NULL); // Array of pointers to the currently active definition for each temp - psDUChain.clear(); - psUDChain.clear(); + psDUChain.clear(); + psUDChain.clear(); - for (uint32_t i = 0; i < ui32NumTemps * 4; i++) - { - psUDChain.insert(std::make_pair(i, UseDefineChain())); - psDUChain.insert(std::make_pair(i, DefineUseChain())); - } + for (uint32_t i = 0; i < ui32NumTemps * 4; i++) + { + psUDChain.insert(std::make_pair(i, UseDefineChain())); + psDUChain.insert(std::make_pair(i, DefineUseChain())); + } - const ControlFlowGraph::BasicBlockStorage &blocks = cfg.AllBlocks(); + const ControlFlowGraph::BasicBlockStorage &blocks = cfg.AllBlocks(); - // Loop through each block, first calculate the union of all the reachables of all preceding blocks - // and then build on that as we go along the basic block instructions + // Loop through each block, first calculate the union of all the reachables of all preceding blocks + // and then build on that as we go along the basic block instructions for_each(blocks.begin(), blocks.end(), [&](const HLSLcc::shared_ptr &bptr) - { - const BasicBlock &b = *bptr.get(); - BasicBlock::ReachableVariables rvars; - for_each(b.Preceding().begin(), b.Preceding().end(), [&](const Instruction *precBlock) - { - const BasicBlock &b = *cfg.GetBasicBlockForInstruction(precBlock); - BasicBlock::RVarUnion(rvars, b.Reachable()); - }); - - // Now we have a Reachable set for the beginning of this block in rvars. Loop through all instructions and their operands and pick up uses and definitions - for (const Instruction *inst = b.First(); inst <= b.Last(); inst++) - { - // Process sources first - ForEachOperand(inst, inst+1, FEO_FLAG_SRC_OPERAND | FEO_FLAG_SUBOPERAND, - [&](const Instruction *psInst, const Operand *psOperand, uint32_t ui32OperandType) - { - if (psOperand->eType != OPERAND_TYPE_TEMP) - return; - - uint32_t tempReg = psOperand->ui32RegisterNumber; - uint32_t accessMask = psOperand->GetAccessMask(); - - // Go through each component - for (int k = 0; k < 4; k++) - { - if (!(accessMask & (1 << k))) - continue; - - uint32_t regIdx = tempReg * 4 + k; - - // Add an use for all visible definitions - psUDChain[regIdx].push_front(UseDefineChainEntry()); - UseDefineChainEntry &ue = *psUDChain[regIdx].begin(); - ue.psInst = (Instruction *)psInst; - ue.psOp = (Operand *)psOperand; - ue.accessMask = accessMask; - ue.index = k; - ue.psSiblings[k] = &ue; - // ue.siblings will be filled out later. - - BasicBlock::ReachableDefinitionsPerVariable& rpv = rvars[regIdx]; - for_each(rpv.begin(), rpv.end(), [&](const BasicBlock::Definition &def) - { - DefineUseChainEntry *duentry = GetOrCreateDefinition(def, psDUChain[regIdx], k); - ue.defines.insert(duentry); - duentry->usages.insert(&ue); - }); - } - return; - }); - - // Then the destination operands - ForEachOperand(inst, inst+1, FEO_FLAG_DEST_OPERAND, - [&](const Instruction *psInst, const Operand *psOperand, uint32_t ui32OperandType) - { - if (psOperand->eType != OPERAND_TYPE_TEMP) - return; - - uint32_t tempReg = psOperand->ui32RegisterNumber; - uint32_t accessMask = psOperand->GetAccessMask(); - - // Go through each component - for (int k = 0; k < 4; k++) - { - if (!(accessMask & (1 << k))) - continue; - - uint32_t regIdx = tempReg * 4 + k; - - // Overwrite whatever's in rvars; they are killed by this - rvars[regIdx].clear(); - rvars[regIdx].insert(BasicBlock::Definition(psInst, psOperand)); - - // Make sure the definition gets created even though it doesn't have any uses at all - // (happens when sampling a texture but not all channels are used etc). - GetOrCreateDefinition(BasicBlock::Definition(psInst, psOperand), psDUChain[regIdx], k); - - } - return; - }); - } - }); - - // Connect the siblings for all uses and definitions - for_each(psUDChain.begin(), psUDChain.end(), [&](std::pair &udpair) - { - UseDefineChain &ud = udpair.second; - // Clear out the bottom 2 bits to get the actual base reg - uint32_t baseReg = udpair.first & ~(3); - - for_each(ud.begin(), ud.end(), [&](UseDefineChainEntry &ue) - { - ASSERT(baseReg / 4 == ue.psOp->ui32RegisterNumber); - - // Go through each component - for (int k = 0; k < 4; k++) - { - // Skip components that we don't access, or the one that's our own - if (!(ue.accessMask & (1 << k)) || ue.index == k) - continue; - - // Find the corresponding sibling. We can uniquely identify it by the operand pointer alone. - UseDefineChain::iterator siblItr = std::find_if(psUDChain[baseReg + k].begin(), psUDChain[baseReg + k].end(), [&](const UseDefineChainEntry &_sibl) -> bool { return _sibl.psOp == ue.psOp; }); - ASSERT(siblItr != psUDChain[baseReg + k].end()); - UseDefineChainEntry &sibling = *siblItr; - ue.psSiblings[k] = &sibling; - } - }); - }); - - // Same for definitions - for_each(psDUChain.begin(), psDUChain.end(), [&](std::pair &dupair) - { - DefineUseChain &du = dupair.second; - // Clear out the bottom 2 bits to get the actual base reg - uint32_t baseReg = dupair.first & ~(3); - - for_each(du.begin(), du.end(), [&](DefineUseChainEntry &de) - { - ASSERT(baseReg / 4 == de.psOp->ui32RegisterNumber); - - // Go through each component - for (int k = 0; k < 4; k++) - { - // Skip components that we don't access, or the one that's our own - if (!(de.writeMask & (1 << k)) || de.index == k) - continue; - - // Find the corresponding sibling. We can uniquely identify it by the operand pointer alone. - DefineUseChain::iterator siblItr = std::find_if(psDUChain[baseReg + k].begin(), psDUChain[baseReg + k].end(), [&](const DefineUseChainEntry &_sibl) -> bool { return _sibl.psOp == de.psOp; }); - ASSERT(siblItr != psDUChain[baseReg + k].end()); - DefineUseChainEntry &sibling = *siblItr; - de.psSiblings[k] = &sibling; - } - }); - }); + { + const BasicBlock &b = *bptr.get(); + BasicBlock::ReachableVariables rvars; + for_each(b.Preceding().begin(), b.Preceding().end(), [&](const Instruction *precBlock) + { + const BasicBlock &b = *cfg.GetBasicBlockForInstruction(precBlock); + BasicBlock::RVarUnion(rvars, b.Reachable()); + }); + + // Now we have a Reachable set for the beginning of this block in rvars. Loop through all instructions and their operands and pick up uses and definitions + for (const Instruction *inst = b.First(); inst <= b.Last(); inst++) + { + // Process sources first + ForEachOperand(inst, inst + 1, FEO_FLAG_SRC_OPERAND | FEO_FLAG_SUBOPERAND, + [&](const Instruction *psInst, const Operand *psOperand, uint32_t ui32OperandType) + { + if (psOperand->eType != OPERAND_TYPE_TEMP) + return; + + uint32_t tempReg = psOperand->ui32RegisterNumber; + uint32_t accessMask = psOperand->GetAccessMask(); + + // Go through each component + for (int k = 0; k < 4; k++) + { + if (!(accessMask & (1 << k))) + continue; + + uint32_t regIdx = tempReg * 4 + k; + + // Add an use for all visible definitions + psUDChain[regIdx].push_front(UseDefineChainEntry()); + UseDefineChainEntry &ue = *psUDChain[regIdx].begin(); + ue.psInst = (Instruction *)psInst; + ue.psOp = (Operand *)psOperand; + ue.accessMask = accessMask; + ue.index = k; + ue.psSiblings[k] = &ue; + // ue.siblings will be filled out later. + + BasicBlock::ReachableDefinitionsPerVariable& rpv = rvars[regIdx]; + for_each(rpv.begin(), rpv.end(), [&](const BasicBlock::Definition &def) + { + DefineUseChainEntry *duentry = GetOrCreateDefinition(def, psDUChain[regIdx], k); + ue.defines.insert(duentry); + duentry->usages.insert(&ue); + }); + } + return; + }); + + // Then the destination operands + ForEachOperand(inst, inst + 1, FEO_FLAG_DEST_OPERAND, + [&](const Instruction *psInst, const Operand *psOperand, uint32_t ui32OperandType) + { + if (psOperand->eType != OPERAND_TYPE_TEMP) + return; + + uint32_t tempReg = psOperand->ui32RegisterNumber; + uint32_t accessMask = psOperand->GetAccessMask(); + + // Go through each component + for (int k = 0; k < 4; k++) + { + if (!(accessMask & (1 << k))) + continue; + + uint32_t regIdx = tempReg * 4 + k; + + // Overwrite whatever's in rvars; they are killed by this + rvars[regIdx].clear(); + rvars[regIdx].insert(BasicBlock::Definition(psInst, psOperand)); + + // Make sure the definition gets created even though it doesn't have any uses at all + // (happens when sampling a texture but not all channels are used etc). + GetOrCreateDefinition(BasicBlock::Definition(psInst, psOperand), psDUChain[regIdx], k); + } + return; + }); + } + }); + + // Connect the siblings for all uses and definitions + for_each(psUDChain.begin(), psUDChain.end(), [&](std::pair &udpair) + { + UseDefineChain &ud = udpair.second; + // Clear out the bottom 2 bits to get the actual base reg + uint32_t baseReg = udpair.first & ~(3); + + for_each(ud.begin(), ud.end(), [&](UseDefineChainEntry &ue) + { + ASSERT(baseReg / 4 == ue.psOp->ui32RegisterNumber); + + // Go through each component + for (int k = 0; k < 4; k++) + { + // Skip components that we don't access, or the one that's our own + if (!(ue.accessMask & (1 << k)) || ue.index == k) + continue; + + // Find the corresponding sibling. We can uniquely identify it by the operand pointer alone. + UseDefineChain::iterator siblItr = std::find_if(psUDChain[baseReg + k].begin(), psUDChain[baseReg + k].end(), [&](const UseDefineChainEntry &_sibl) -> bool { return _sibl.psOp == ue.psOp; }); + ASSERT(siblItr != psUDChain[baseReg + k].end()); + UseDefineChainEntry &sibling = *siblItr; + ue.psSiblings[k] = &sibling; + } + }); + }); + + // Same for definitions + for_each(psDUChain.begin(), psDUChain.end(), [&](std::pair &dupair) + { + DefineUseChain &du = dupair.second; + // Clear out the bottom 2 bits to get the actual base reg + uint32_t baseReg = dupair.first & ~(3); + + for_each(du.begin(), du.end(), [&](DefineUseChainEntry &de) + { + ASSERT(baseReg / 4 == de.psOp->ui32RegisterNumber); + + // Go through each component + for (int k = 0; k < 4; k++) + { + // Skip components that we don't access, or the one that's our own + if (!(de.writeMask & (1 << k)) || de.index == k) + continue; + + // Find the corresponding sibling. We can uniquely identify it by the operand pointer alone. + DefineUseChain::iterator siblItr = std::find_if(psDUChain[baseReg + k].begin(), psDUChain[baseReg + k].end(), [&](const DefineUseChainEntry &_sibl) -> bool { return _sibl.psOp == de.psOp; }); + ASSERT(siblItr != psDUChain[baseReg + k].end()); + DefineUseChainEntry &sibling = *siblItr; + de.psSiblings[k] = &sibling; + } + }); + }); #if DEBUG_UDCHAINS - UDCheckConsistency(ui32NumTemps, psDUChain, psUDChain, lastSeenDefinitions); + UDCheckConsistency(ui32NumTemps, psDUChain, psUDChain, lastSeenDefinitions); #endif } - typedef std::vector SplitDefinitions; // Split out a define to use a new temp register static void UDDoSplit(SplitDefinitions &defs, uint32_t *psNumTemps, DefineUseChains &psDUChains, UseDefineChains &psUDChains, std::vector &pui32SplitTable) { - uint32_t newReg = *psNumTemps; - uint32_t oldReg = defs[0]->psOp->ui32RegisterNumber; - uint32_t accessMask = defs[0]->writeMask; - uint32_t i, u32def; - uint32_t rebase, count; - uint32_t splitTableValue; + uint32_t newReg = *psNumTemps; + uint32_t oldReg = defs[0]->psOp->ui32RegisterNumber; + uint32_t accessMask = defs[0]->writeMask; + uint32_t i, u32def; + uint32_t rebase, count; + uint32_t splitTableValue; - ASSERT(defs.size() > 0); - for (i = 1; i < defs.size(); i++) - { - ASSERT(defs[i]->psOp->ui32RegisterNumber == oldReg); - accessMask |= defs[i]->writeMask; - } + ASSERT(defs.size() > 0); + for (i = 1; i < defs.size(); i++) + { + ASSERT(defs[i]->psOp->ui32RegisterNumber == oldReg); + accessMask |= defs[i]->writeMask; + } - (*psNumTemps)++; + (*psNumTemps)++; #if DEBUG_UDCHAINS - UDCheckConsistency((*psNumTemps) - 1, psDUChains, psUDChains, ActiveDefinitions()); + UDCheckConsistency((*psNumTemps) - 1, psDUChains, psUDChains, ActiveDefinitions()); #endif - ASSERT(accessMask != 0 && accessMask <= 0xf); - // Calculate rebase value and component count - rebase = 0; - count = 0; - i = accessMask; - while ((i & 1) == 0) - { - rebase++; - i = i >> 1; - } - while (i != 0) - { - count++; - i = i >> 1; - } - - // Make sure there's enough room in the split table - if (pui32SplitTable.size() <= newReg) - { - size_t newSize = pui32SplitTable.size() * 2; - pui32SplitTable.resize(newSize, 0xffffffff); - } - - // Set the original temp of the new register - { - uint32_t origTemp = oldReg; - while (pui32SplitTable[origTemp] != 0xffffffff) - origTemp = pui32SplitTable[origTemp] & 0xffff; - - ASSERT(rebase < 4); - ASSERT(count <= 4); - splitTableValue = (count << 24) | (rebase << 16) | origTemp; - - pui32SplitTable[newReg] = splitTableValue; - } - - // Insert the new temps to the map - for (i = newReg * 4; i < newReg * 4 + 4; i++) - { - psUDChains.insert(std::make_pair(i, UseDefineChain())); - psDUChains.insert(std::make_pair(i, DefineUseChain())); - } - - for (u32def = 0; u32def < defs.size(); u32def++) - { - DefineUseChainEntry *defineToSplit = defs[u32def]; - uint32_t oldIdx = defineToSplit->index; + ASSERT(accessMask != 0 && accessMask <= 0xf); + // Calculate rebase value and component count + rebase = 0; + count = 0; + i = accessMask; + while ((i & 1) == 0) + { + rebase++; + i = i >> 1; + } + while (i != 0) + { + count++; + i = i >> 1; + } + + // Make sure there's enough room in the split table + if (pui32SplitTable.size() <= newReg) + { + size_t newSize = pui32SplitTable.size() * 2; + pui32SplitTable.resize(newSize, 0xffffffff); + } + + // Set the original temp of the new register + { + uint32_t origTemp = oldReg; + while (pui32SplitTable[origTemp] != 0xffffffff) + origTemp = pui32SplitTable[origTemp] & 0xffff; + + ASSERT(rebase < 4); + ASSERT(count <= 4); + splitTableValue = (count << 24) | (rebase << 16) | origTemp; + + pui32SplitTable[newReg] = splitTableValue; + } + + // Insert the new temps to the map + for (i = newReg * 4; i < newReg * 4 + 4; i++) + { + psUDChains.insert(std::make_pair(i, UseDefineChain())); + psDUChains.insert(std::make_pair(i, DefineUseChain())); + } + + for (u32def = 0; u32def < defs.size(); u32def++) + { + DefineUseChainEntry *defineToSplit = defs[u32def]; + uint32_t oldIdx = defineToSplit->index; #if DEBUG_UDCHAINS - printf("Split def at instruction %d (reg %d -> %d, access %X, rebase %d, count: %d)\n", (int)defineToSplit->psInst->id, oldReg, newReg, accessMask, rebase, count); + printf("Split def at instruction %d (reg %d -> %d, access %X, rebase %d, count: %d)\n", (int)defineToSplit->psInst->id, oldReg, newReg, accessMask, rebase, count); #endif - // We may have moved the opcodes already because of multiple defines pointing to the same op - if (defineToSplit->psOp->ui32RegisterNumber != newReg) - { - ASSERT(defineToSplit->psOp->ui32RegisterNumber == oldReg); - // Update the declaration operand - // Don't change possible suboperands as they are sources - defineToSplit->psInst->ChangeOperandTempRegister(defineToSplit->psOp, oldReg, newReg, accessMask, UD_CHANGE_MAIN_OPERAND, rebase); - } - - defineToSplit->writeMask >>= rebase; - defineToSplit->index -= rebase; - // Change the temp register number for all usages - UsageSet::iterator ul = defineToSplit->usages.begin(); - while (ul != defineToSplit->usages.end()) - { - // Already updated by one of the siblings? Skip. - if ((*ul)->psOp->ui32RegisterNumber != newReg) - { - ASSERT((*ul)->psOp->ui32RegisterNumber == oldReg); - (*ul)->psInst->ChangeOperandTempRegister((*ul)->psOp, oldReg, newReg, accessMask, UD_CHANGE_MAIN_OPERAND, rebase); - } - - // Update the UD chain - { - UseDefineChain::iterator udLoc = psUDChains[oldReg * 4 + oldIdx].begin(); - while (udLoc != psUDChains[oldReg * 4 + oldIdx].end()) - { - if (&*udLoc == *ul) - { - // Move to new list - psUDChains[newReg * 4 + oldIdx - rebase].splice(psUDChains[newReg * 4 + oldIdx - rebase].begin(), psUDChains[oldReg * 4 + oldIdx], udLoc); - - if (rebase > 0) - { - (*ul)->accessMask >>= rebase; - (*ul)->index -= rebase; - memmove((*ul)->psSiblings, (*ul)->psSiblings + rebase, (4 - rebase) * sizeof(UseDefineChain *)); - } - break; - } - udLoc++; - } - } - - ul++; - } - - // Move the define out of the old chain (if its still there) - { - // Find the define in the old chain - DefineUseChain::iterator duLoc = psDUChains[oldReg * 4 + oldIdx].begin(); - while (duLoc != psDUChains[oldReg * 4 + oldIdx].end() && ((&*duLoc) != defineToSplit)) - { - duLoc++; - } - ASSERT(duLoc != psDUChains[oldReg * 4 + oldIdx].end()); - { - // Move directly to new chain - psDUChains[newReg * 4 + oldIdx - rebase].splice(psDUChains[newReg * 4 + oldIdx - rebase].begin(), psDUChains[oldReg * 4 + oldIdx], duLoc); - if (rebase != 0) - { - memmove(defineToSplit->psSiblings, defineToSplit->psSiblings + rebase, (4 - rebase) * sizeof(DefineUseChain *)); - } - } - - } - - } + // We may have moved the opcodes already because of multiple defines pointing to the same op + if (defineToSplit->psOp->ui32RegisterNumber != newReg) + { + ASSERT(defineToSplit->psOp->ui32RegisterNumber == oldReg); + // Update the declaration operand + // Don't change possible suboperands as they are sources + defineToSplit->psInst->ChangeOperandTempRegister(defineToSplit->psOp, oldReg, newReg, accessMask, UD_CHANGE_MAIN_OPERAND, rebase); + } + + defineToSplit->writeMask >>= rebase; + defineToSplit->index -= rebase; + // Change the temp register number for all usages + UsageSet::iterator ul = defineToSplit->usages.begin(); + while (ul != defineToSplit->usages.end()) + { + // Already updated by one of the siblings? Skip. + if ((*ul)->psOp->ui32RegisterNumber != newReg) + { + ASSERT((*ul)->psOp->ui32RegisterNumber == oldReg); + (*ul)->psInst->ChangeOperandTempRegister((*ul)->psOp, oldReg, newReg, accessMask, UD_CHANGE_MAIN_OPERAND, rebase); + } + + // Update the UD chain + { + UseDefineChain::iterator udLoc = psUDChains[oldReg * 4 + oldIdx].begin(); + while (udLoc != psUDChains[oldReg * 4 + oldIdx].end()) + { + if (&*udLoc == *ul) + { + // Move to new list + psUDChains[newReg * 4 + oldIdx - rebase].splice(psUDChains[newReg * 4 + oldIdx - rebase].begin(), psUDChains[oldReg * 4 + oldIdx], udLoc); + + if (rebase > 0) + { + (*ul)->accessMask >>= rebase; + (*ul)->index -= rebase; + memmove((*ul)->psSiblings, (*ul)->psSiblings + rebase, (4 - rebase) * sizeof(UseDefineChain *)); + } + break; + } + udLoc++; + } + } + + ul++; + } + + // Move the define out of the old chain (if its still there) + { + // Find the define in the old chain + DefineUseChain::iterator duLoc = psDUChains[oldReg * 4 + oldIdx].begin(); + while (duLoc != psDUChains[oldReg * 4 + oldIdx].end() && ((&*duLoc) != defineToSplit)) + { + duLoc++; + } + ASSERT(duLoc != psDUChains[oldReg * 4 + oldIdx].end()); + { + // Move directly to new chain + psDUChains[newReg * 4 + oldIdx - rebase].splice(psDUChains[newReg * 4 + oldIdx - rebase].begin(), psDUChains[oldReg * 4 + oldIdx], duLoc); + if (rebase != 0) + { + memmove(defineToSplit->psSiblings, defineToSplit->psSiblings + rebase, (4 - rebase) * sizeof(DefineUseChain *)); + } + } + } + } #if DEBUG_UDCHAINS - UDCheckConsistency(*psNumTemps, psDUChains, psUDChains, ActiveDefinitions()); + UDCheckConsistency(*psNumTemps, psDUChains, psUDChains, ActiveDefinitions()); #endif } // Adds a define and all its siblings to the list, checking duplicates static void AddDefineToList(SplitDefinitions &defs, DefineUseChainEntry *newDef) { - uint32_t k; - for (k = 0; k < 4; k++) - { - if (newDef->psSiblings[k]) - { - DefineUseChainEntry *defToAdd = newDef->psSiblings[k]; - uint32_t m; - int defFound = 0; - for (m = 0; m < defs.size(); m++) - { - if (defs[m] == defToAdd) - { - defFound = 1; - break; - } - } - if (defFound == 0) - { - defs.push_back(newDef->psSiblings[k]); - } - } - } + uint32_t k; + for (k = 0; k < 4; k++) + { + if (newDef->psSiblings[k]) + { + DefineUseChainEntry *defToAdd = newDef->psSiblings[k]; + uint32_t m; + int defFound = 0; + for (m = 0; m < defs.size(); m++) + { + if (defs[m] == defToAdd) + { + defFound = 1; + break; + } + } + if (defFound == 0) + { + defs.push_back(newDef->psSiblings[k]); + } + } + } } // Check if a set of definitions can be split and does the split. Returns nonzero if a split took place static int AttemptSplitDefinitions(SplitDefinitions &defs, uint32_t *psNumTemps, DefineUseChains &psDUChains, UseDefineChains &psUDChains, std::vector &pui32SplitTable) { - uint32_t reg; - uint32_t combinedMask; - uint32_t i, k, u32def; - int canSplit = 1; - DefineUseChain::iterator du; - int hasLeftoverDefinitions = 0; - // Initial checks: all definitions must: - // Access the same register - // Have at least one definition in any of the 4 register slots that isn't included - if (defs.empty()) - return 0; - - reg = defs[0]->psOp->ui32RegisterNumber; - combinedMask = defs[0]->writeMask; - for (i = 1; i < defs.size(); i++) - { - if (reg != defs[i]->psOp->ui32RegisterNumber) - return 0; - - combinedMask |= defs[i]->writeMask; - } - for (i = 0; i < 4; i++) - { - du = psDUChains[reg * 4 + i].begin(); - while (du != psDUChains[reg * 4 + i].end()) - { - int defFound = 0; - for (k = 0; k < defs.size(); k++) - { - if (&*du == defs[k]) - { - defFound = 1; - break; - } - } - if (defFound == 0) - { - hasLeftoverDefinitions = 1; - break; - } - du++; - } - if (hasLeftoverDefinitions) - break; - } - // We'd be splitting the entire register and all its definitions, no point in that. - if (hasLeftoverDefinitions == 0) - return 0; - - // Check all the definitions. Any of them must not have any usages that see any definitions not in our defs array. - for (u32def = 0; u32def < defs.size(); u32def++) - { - DefineUseChainEntry *def = defs[u32def]; - - UsageSet::iterator ul = def->usages.begin(); - while (ul != def->usages.end()) - { - uint32_t j; - - // Check that we only read a subset of the combined writemask - if (((*ul)->accessMask & (~combinedMask)) != 0) - { - // Do an additional attempt, pick up all the sibling definitions as well - // Only do this if we have the space in the definitions table - for (j = 0; j < 4; j++) - { - if (((*ul)->accessMask & (1 << j)) == 0) - continue; - AddDefineToList(defs, *(*ul)->psSiblings[j]->defines.begin()); - } - return AttemptSplitDefinitions(defs, psNumTemps, psDUChains, psUDChains, pui32SplitTable); - - } - - // It must have at least one declaration - ASSERT(!(*ul)->defines.empty()); - - // Check that all siblings for the usage use one of the definitions - for (j = 0; j < 4; j++) - { - uint32_t m; - int defineFound = 0; - if (((*ul)->accessMask & (1 << j)) == 0) - continue; - - ASSERT((*ul)->psSiblings[j] != NULL); - ASSERT(!(*ul)->psSiblings[j]->defines.empty()); - - // Check that all definitions for this usage are found from the definitions table - DefineSet::iterator dl = (*ul)->psSiblings[j]->defines.begin(); - while (dl != (*ul)->psSiblings[j]->defines.end()) - { - defineFound = 0; - for (m = 0; m < defs.size(); m++) - { - if (*dl == defs[m]) - { - defineFound = 1; - break; - } - } - if (defineFound == 0) - { - // Add this define and all its siblings to the table and try again - AddDefineToList(defs, *dl); - return AttemptSplitDefinitions(defs, psNumTemps, psDUChains, psUDChains, pui32SplitTable); - canSplit = 0; - break; - } - - dl++; - } - - if (defineFound == 0) - { - canSplit = 0; - break; - } - } - if (canSplit == 0) - break; - - // This'll do, check next usage - ul++; - } - if (canSplit == 0) - break; - - } - if (canSplit) - { - UDDoSplit(defs, psNumTemps, psDUChains, psUDChains, pui32SplitTable); - return 1; - } - return 0; + uint32_t reg; + uint32_t combinedMask; + uint32_t i, k, u32def; + int canSplit = 1; + DefineUseChain::iterator du; + int hasLeftoverDefinitions = 0; + // Initial checks: all definitions must: + // Access the same register + // Have at least one definition in any of the 4 register slots that isn't included + if (defs.empty()) + return 0; + + reg = defs[0]->psOp->ui32RegisterNumber; + combinedMask = defs[0]->writeMask; + for (i = 1; i < defs.size(); i++) + { + if (reg != defs[i]->psOp->ui32RegisterNumber) + return 0; + + combinedMask |= defs[i]->writeMask; + } + for (i = 0; i < 4; i++) + { + du = psDUChains[reg * 4 + i].begin(); + while (du != psDUChains[reg * 4 + i].end()) + { + int defFound = 0; + for (k = 0; k < defs.size(); k++) + { + if (&*du == defs[k]) + { + defFound = 1; + break; + } + } + if (defFound == 0) + { + hasLeftoverDefinitions = 1; + break; + } + du++; + } + if (hasLeftoverDefinitions) + break; + } + // We'd be splitting the entire register and all its definitions, no point in that. + if (hasLeftoverDefinitions == 0) + return 0; + + // Check all the definitions. Any of them must not have any usages that see any definitions not in our defs array. + for (u32def = 0; u32def < defs.size(); u32def++) + { + DefineUseChainEntry *def = defs[u32def]; + + UsageSet::iterator ul = def->usages.begin(); + while (ul != def->usages.end()) + { + uint32_t j; + + // Check that we only read a subset of the combined writemask + if (((*ul)->accessMask & (~combinedMask)) != 0) + { + // Do an additional attempt, pick up all the sibling definitions as well + // Only do this if we have the space in the definitions table + for (j = 0; j < 4; j++) + { + if (((*ul)->accessMask & (1 << j)) == 0) + continue; + AddDefineToList(defs, *(*ul)->psSiblings[j]->defines.begin()); + } + return AttemptSplitDefinitions(defs, psNumTemps, psDUChains, psUDChains, pui32SplitTable); + } + + // It must have at least one declaration + ASSERT(!(*ul)->defines.empty()); + + // Check that all siblings for the usage use one of the definitions + for (j = 0; j < 4; j++) + { + uint32_t m; + int defineFound = 0; + if (((*ul)->accessMask & (1 << j)) == 0) + continue; + + ASSERT((*ul)->psSiblings[j] != NULL); + ASSERT(!(*ul)->psSiblings[j]->defines.empty()); + + // Check that all definitions for this usage are found from the definitions table + DefineSet::iterator dl = (*ul)->psSiblings[j]->defines.begin(); + while (dl != (*ul)->psSiblings[j]->defines.end()) + { + defineFound = 0; + for (m = 0; m < defs.size(); m++) + { + if (*dl == defs[m]) + { + defineFound = 1; + break; + } + } + if (defineFound == 0) + { + // Add this define and all its siblings to the table and try again + AddDefineToList(defs, *dl); + return AttemptSplitDefinitions(defs, psNumTemps, psDUChains, psUDChains, pui32SplitTable); + canSplit = 0; + break; + } + + dl++; + } + + if (defineFound == 0) + { + canSplit = 0; + break; + } + } + if (canSplit == 0) + break; + + // This'll do, check next usage + ul++; + } + if (canSplit == 0) + break; + } + if (canSplit) + { + UDDoSplit(defs, psNumTemps, psDUChains, psUDChains, pui32SplitTable); + return 1; + } + return 0; } // Do temp splitting based on use-define chains void UDSplitTemps(uint32_t *psNumTemps, DefineUseChains &psDUChains, UseDefineChains &psUDChains, std::vector &pui32SplitTable) { - // Algorithm overview: - // Take each definition and look at all its usages. If all usages only see this definition (and this is not the only definition for this variable), - // split it out. - uint32_t i; - uint32_t tempsAtStart = *psNumTemps; // We don't need to try to analyze the newly created ones, they're unsplittable by definition - for (i = 0; i < tempsAtStart * 4; i++) - { - // No definitions? - if (psDUChains[i].empty()) - continue; - - DefineUseChain::iterator du = psDUChains[i].begin(); - // Ok we have multiple definitions for a temp, check them through - while (du != psDUChains[i].end()) - { - SplitDefinitions sd; - AddDefineToList(sd, &*du); - du++; - // If we split, we'll have to start from the beginning of this chain because du might no longer be in this chain - if (AttemptSplitDefinitions(sd, psNumTemps, psDUChains, psUDChains, pui32SplitTable)) - { - du = psDUChains[i].begin(); - } - } - } + // Algorithm overview: + // Take each definition and look at all its usages. If all usages only see this definition (and this is not the only definition for this variable), + // split it out. + uint32_t i; + uint32_t tempsAtStart = *psNumTemps; // We don't need to try to analyze the newly created ones, they're unsplittable by definition + for (i = 0; i < tempsAtStart * 4; i++) + { + // No definitions? + if (psDUChains[i].empty()) + continue; + + DefineUseChain::iterator du = psDUChains[i].begin(); + // Ok we have multiple definitions for a temp, check them through + while (du != psDUChains[i].end()) + { + SplitDefinitions sd; + AddDefineToList(sd, &*du); + du++; + // If we split, we'll have to start from the beginning of this chain because du might no longer be in this chain + if (AttemptSplitDefinitions(sd, psNumTemps, psDUChains, psUDChains, pui32SplitTable)) + { + du = psDUChains[i].begin(); + } + } + } } // Returns nonzero if all the operands have partial precision and at least one of them has been downgraded as part of shader downgrading process. // Sampler ops, bitwise ops and comparisons are ignored. static int CanDowngradeDefinitionPrecision(DefineUseChain::iterator du, OPERAND_MIN_PRECISION *pType) { - Instruction *psInst = du->psInst; - int hasFullPrecOperands = 0; - uint32_t i; - - if (du->psOp->eMinPrecision != OPERAND_MIN_PRECISION_DEFAULT) - return 0; - - switch (psInst->eOpcode) - { - case OPCODE_ADD: - case OPCODE_MUL: - case OPCODE_MOV: - case OPCODE_MAD: - case OPCODE_DIV: - case OPCODE_LOG: - case OPCODE_EXP: - case OPCODE_MAX: - case OPCODE_MIN: - case OPCODE_DP2: - case OPCODE_DP2ADD: - case OPCODE_DP3: - case OPCODE_DP4: - case OPCODE_RSQ: - case OPCODE_SQRT: - break; - default: - return 0; - } - - for (i = psInst->ui32FirstSrc; i < psInst->ui32NumOperands; i++) - { - Operand *op = &psInst->asOperands[i]; - if (op->eType == OPERAND_TYPE_IMMEDIATE32) - continue; // Immediate values are ignored - - if (op->eMinPrecision == OPERAND_MIN_PRECISION_DEFAULT) - { - hasFullPrecOperands = 1; - break; - } - } - - if (hasFullPrecOperands) - return 0; - - if (pType) - *pType = OPERAND_MIN_PRECISION_FLOAT_16; // Don't go lower than mediump - - return 1; + Instruction *psInst = du->psInst; + int hasFullPrecOperands = 0; + uint32_t i; + + if (du->psOp->eMinPrecision != OPERAND_MIN_PRECISION_DEFAULT) + return 0; + + switch (psInst->eOpcode) + { + case OPCODE_ADD: + case OPCODE_MUL: + case OPCODE_MOV: + case OPCODE_MAD: + case OPCODE_DIV: + case OPCODE_LOG: + case OPCODE_EXP: + case OPCODE_MAX: + case OPCODE_MIN: + case OPCODE_DP2: + case OPCODE_DP2ADD: + case OPCODE_DP3: + case OPCODE_DP4: + case OPCODE_RSQ: + case OPCODE_SQRT: + break; + default: + return 0; + } + + for (i = psInst->ui32FirstSrc; i < psInst->ui32NumOperands; i++) + { + Operand *op = &psInst->asOperands[i]; + if (op->eType == OPERAND_TYPE_IMMEDIATE32) + continue; // Immediate values are ignored + + if (op->eMinPrecision == OPERAND_MIN_PRECISION_DEFAULT) + { + hasFullPrecOperands = 1; + break; + } + } + + if (hasFullPrecOperands) + return 0; + + if (pType) + *pType = OPERAND_MIN_PRECISION_FLOAT_16; // Don't go lower than mediump + + return 1; } // Returns true if all the usages of this definitions are instructions that deal with floating point data static bool HasOnlyFloatUsages(DefineUseChain::iterator du) { - UsageSet::iterator itr = du->usages.begin(); - for (; itr != du->usages.end(); itr++) - { - Instruction *psInst = (*itr)->psInst; - - if ((*itr)->psOp->eMinPrecision != OPERAND_MIN_PRECISION_DEFAULT) - return false; - - switch (psInst->eOpcode) - { - case OPCODE_ADD: - case OPCODE_MUL: - case OPCODE_MOV: - case OPCODE_MAD: - case OPCODE_DIV: - case OPCODE_LOG: - case OPCODE_EXP: - case OPCODE_MAX: - case OPCODE_MIN: - case OPCODE_DP2: - case OPCODE_DP2ADD: - case OPCODE_DP3: - case OPCODE_DP4: - case OPCODE_RSQ: - case OPCODE_SQRT: - break; - default: - return false; - } - } - return true; + UsageSet::iterator itr = du->usages.begin(); + for (; itr != du->usages.end(); itr++) + { + Instruction *psInst = (*itr)->psInst; + + if ((*itr)->psOp->eMinPrecision != OPERAND_MIN_PRECISION_DEFAULT) + return false; + + switch (psInst->eOpcode) + { + case OPCODE_ADD: + case OPCODE_MUL: + case OPCODE_MOV: + case OPCODE_MAD: + case OPCODE_DIV: + case OPCODE_LOG: + case OPCODE_EXP: + case OPCODE_MAX: + case OPCODE_MIN: + case OPCODE_DP2: + case OPCODE_DP2ADD: + case OPCODE_DP3: + case OPCODE_DP4: + case OPCODE_RSQ: + case OPCODE_SQRT: + break; + default: + return false; + } + } + return true; } // Based on the sampler precisions, downgrade the definitions if possible. void UpdateSamplerPrecisions(const ShaderInfo &info, DefineUseChains &psDUChains, uint32_t ui32NumTemps) { - uint32_t madeProgress = 0; - do - { - uint32_t i; - madeProgress = 0; - for (i = 0; i < ui32NumTemps * 4; i++) - { - DefineUseChain::iterator du = psDUChains[i].begin(); - while (du != psDUChains[i].end()) - { - OPERAND_MIN_PRECISION sType = OPERAND_MIN_PRECISION_DEFAULT; - if ((du->psInst->IsPartialPrecisionSamplerInstruction(info, &sType) - || CanDowngradeDefinitionPrecision(du, &sType)) - && du->psInst->asOperands[0].eType == OPERAND_TYPE_TEMP - && du->psInst->asOperands[0].eMinPrecision == OPERAND_MIN_PRECISION_DEFAULT - && du->isStandalone - && HasOnlyFloatUsages(du)) - { - uint32_t sibl; - // Ok we can change the precision. - ASSERT(du->psOp->eType == OPERAND_TYPE_TEMP); - ASSERT(sType != OPERAND_MIN_PRECISION_DEFAULT); - du->psOp->eMinPrecision = sType; - - // Update all the uses of all the siblings - for (sibl = 0; sibl < 4; sibl++) - { - if (!du->psSiblings[sibl]) - continue; - - UsageSet::iterator ul = du->psSiblings[sibl]->usages.begin(); - while (ul != du->psSiblings[sibl]->usages.end()) - { - ASSERT((*ul)->psOp->eMinPrecision == OPERAND_MIN_PRECISION_DEFAULT || - (*ul)->psOp->eMinPrecision == sType); - // We may well write this multiple times to the same op but that's fine. - (*ul)->psOp->eMinPrecision = sType; - - ul++; - } - } - madeProgress = 1; - } - du++; - } - } - } while (madeProgress != 0); - + uint32_t madeProgress = 0; + do + { + uint32_t i; + madeProgress = 0; + for (i = 0; i < ui32NumTemps * 4; i++) + { + DefineUseChain::iterator du = psDUChains[i].begin(); + while (du != psDUChains[i].end()) + { + OPERAND_MIN_PRECISION sType = OPERAND_MIN_PRECISION_DEFAULT; + if ((du->psInst->IsPartialPrecisionSamplerInstruction(info, &sType) + || CanDowngradeDefinitionPrecision(du, &sType)) + && du->psInst->asOperands[0].eType == OPERAND_TYPE_TEMP + && du->psInst->asOperands[0].eMinPrecision == OPERAND_MIN_PRECISION_DEFAULT + && du->isStandalone + && HasOnlyFloatUsages(du)) + { + uint32_t sibl; + // Ok we can change the precision. + ASSERT(du->psOp->eType == OPERAND_TYPE_TEMP); + ASSERT(sType != OPERAND_MIN_PRECISION_DEFAULT); + du->psOp->eMinPrecision = sType; + + // Update all the uses of all the siblings + for (sibl = 0; sibl < 4; sibl++) + { + if (!du->psSiblings[sibl]) + continue; + + UsageSet::iterator ul = du->psSiblings[sibl]->usages.begin(); + while (ul != du->psSiblings[sibl]->usages.end()) + { + ASSERT((*ul)->psOp->eMinPrecision == OPERAND_MIN_PRECISION_DEFAULT || + (*ul)->psOp->eMinPrecision == sType); + // We may well write this multiple times to the same op but that's fine. + (*ul)->psOp->eMinPrecision = sType; + + ul++; + } + } + madeProgress = 1; + } + du++; + } + } + } + while (madeProgress != 0); } void CalculateStandaloneDefinitions(DefineUseChains &psDUChains, uint32_t ui32NumTemps) { - uint32_t i; - for (i = 0; i < ui32NumTemps * 4; i++) - { - DefineUseChain::iterator du = psDUChains[i].begin(); - while (du != psDUChains[i].end()) - { - uint32_t sibl; - int isStandalone = 1; - if (du->isStandalone) - { - du++; - continue; - } - - for (sibl = 0; sibl < 4; sibl++) - { - if (!du->psSiblings[sibl]) - continue; - - UsageSet::iterator ul = du->psSiblings[sibl]->usages.begin(); - while (ul != du->psSiblings[sibl]->usages.end()) - { - uint32_t k; - ASSERT(!(*ul)->defines.empty()); - - // Need to check that all the siblings of this usage only see this definition's corresponding sibling - for (k = 0; k < 4; k++) - { - if (!(*ul)->psSiblings[k]) - continue; - - if ((*ul)->psSiblings[k]->defines.size() > 1 - || *(*ul)->psSiblings[k]->defines.begin() != du->psSiblings[k]) - { - isStandalone = 0; - break; - } - } - if (isStandalone == 0) - break; - - ul++; - } - if (isStandalone == 0) - break; - } - - if (isStandalone) - { - // Yep, mark it - for (sibl = 0; sibl < 4; sibl++) - { - if (!du->psSiblings[sibl]) - continue; - du->psSiblings[sibl]->isStandalone = 1; - } - } - du++; - } - } + uint32_t i; + for (i = 0; i < ui32NumTemps * 4; i++) + { + DefineUseChain::iterator du = psDUChains[i].begin(); + while (du != psDUChains[i].end()) + { + uint32_t sibl; + int isStandalone = 1; + if (du->isStandalone) + { + du++; + continue; + } + + for (sibl = 0; sibl < 4; sibl++) + { + if (!du->psSiblings[sibl]) + continue; + + UsageSet::iterator ul = du->psSiblings[sibl]->usages.begin(); + while (ul != du->psSiblings[sibl]->usages.end()) + { + uint32_t k; + ASSERT(!(*ul)->defines.empty()); + + // Need to check that all the siblings of this usage only see this definition's corresponding sibling + for (k = 0; k < 4; k++) + { + if (!(*ul)->psSiblings[k]) + continue; + + if ((*ul)->psSiblings[k]->defines.size() > 1 + || *(*ul)->psSiblings[k]->defines.begin() != du->psSiblings[k]) + { + isStandalone = 0; + break; + } + } + if (isStandalone == 0) + break; + + ul++; + } + if (isStandalone == 0) + break; + } + + if (isStandalone) + { + // Yep, mark it + for (sibl = 0; sibl < 4; sibl++) + { + if (!du->psSiblings[sibl]) + continue; + du->psSiblings[sibl]->isStandalone = 1; + } + } + du++; + } + } } // Write the uses and defines back to Instruction and Operand member lists. void WriteBackUsesAndDefines(DefineUseChains &psDUChains) { - using namespace std; - // Loop through the whole data structure, and write usages and defines to Instructions and Operands as we see them - for_each(psDUChains.begin(), psDUChains.end(), [](const DefineUseChains::value_type &itr) - { - const DefineUseChain &duChain = itr.second; - for_each(duChain.begin(), duChain.end(), [](const DefineUseChain::value_type &du) - { - for_each(du.usages.begin(), du.usages.end(), [&du](const UseDefineChainEntry *usage) - { - // Update instruction use list - du.psInst->m_Uses.push_back(Instruction::Use(usage->psInst, usage->psOp)); - // And the usage's definition - usage->psOp->m_Defines.push_back(Operand::Define(du.psInst, du.psOp)); - - }); - }); - }); + using namespace std; + // Loop through the whole data structure, and write usages and defines to Instructions and Operands as we see them + for_each(psDUChains.begin(), psDUChains.end(), [](const DefineUseChains::value_type &itr) + { + const DefineUseChain &duChain = itr.second; + for_each(duChain.begin(), duChain.end(), [](const DefineUseChain::value_type &du) + { + for_each(du.usages.begin(), du.usages.end(), [&du](const UseDefineChainEntry *usage) + { + // Update instruction use list + du.psInst->m_Uses.push_back(Instruction::Use(usage->psInst, usage->psOp)); + // And the usage's definition + usage->psOp->m_Defines.push_back(Operand::Define(du.psInst, du.psOp)); + }); + }); + }); } diff --git a/src/cbstring/bsafe.c b/src/cbstring/bsafe.c index 2a4cf1f..6503761 100644 --- a/src/cbstring/bsafe.c +++ b/src/cbstring/bsafe.c @@ -1,7 +1,7 @@ /* * This source file is part of the bstring string library. This code was - * written by Paul Hsieh in 2002-2010, and is covered by either the 3-clause - * BSD open source license or GPL v2.0. Refer to the accompanying documentation + * written by Paul Hsieh in 2002-2010, and is covered by either the 3-clause + * BSD open source license or GPL v2.0. Refer to the accompanying documentation * for details on usage and license. */ @@ -18,68 +18,70 @@ #include #include "bsafe.h" +#if 0 static int bsafeShouldExit = 1; -#if 0 -char * strcpy (char *dst, const char *src); -char * strcat (char *dst, const char *src); +char * strcpy(char *dst, const char *src); +char * strcat(char *dst, const char *src); -char * strcpy (char *dst, const char *src) { - dst = dst; - src = src; - fprintf (stderr, "bsafe error: strcpy() is not safe, use bstrcpy instead.\n"); - if (bsafeShouldExit) exit (-1); - return NULL; +char * strcpy(char *dst, const char *src) +{ + dst = dst; + src = src; + fprintf(stderr, "bsafe error: strcpy() is not safe, use bstrcpy instead.\n"); + if (bsafeShouldExit) exit(-1); + return NULL; } -char * strcat (char *dst, const char *src) { - dst = dst; - src = src; - fprintf (stderr, "bsafe error: strcat() is not safe, use bstrcat instead.\n"); - if (bsafeShouldExit) exit (-1); - return NULL; +char * strcat(char *dst, const char *src) +{ + dst = dst; + src = src; + fprintf(stderr, "bsafe error: strcat() is not safe, use bstrcat instead.\n"); + if (bsafeShouldExit) exit(-1); + return NULL; } -#if !defined (__GNUC__) && (!defined(_MSC_VER) || (_MSC_VER <= 1310)) -char * (gets) (char * buf) { - buf = buf; - fprintf (stderr, "bsafe error: gets() is not safe, use bgets.\n"); - if (bsafeShouldExit) exit (-1); - return NULL; +#if !defined(__GNUC__) && (!defined(_MSC_VER) || (_MSC_VER <= 1310)) +char * (gets)(char * buf) { + buf = buf; + fprintf(stderr, "bsafe error: gets() is not safe, use bgets.\n"); + if (bsafeShouldExit) exit(-1); + return NULL; } #endif -char * (strncpy) (char *dst, const char *src, size_t n) { - dst = dst; - src = src; - n = n; - fprintf (stderr, "bsafe error: strncpy() is not safe, use bmidstr instead.\n"); - if (bsafeShouldExit) exit (-1); - return NULL; +char * (strncpy)(char *dst, const char *src, size_t n) { + dst = dst; + src = src; + n = n; + fprintf(stderr, "bsafe error: strncpy() is not safe, use bmidstr instead.\n"); + if (bsafeShouldExit) exit(-1); + return NULL; } -char * (strncat) (char *dst, const char *src, size_t n) { - dst = dst; - src = src; - n = n; - fprintf (stderr, "bsafe error: strncat() is not safe, use bstrcat then btrunc\n\tor cstr2tbstr, btrunc then bstrcat instead.\n"); - if (bsafeShouldExit) exit (-1); - return NULL; +char * (strncat)(char *dst, const char *src, size_t n) { + dst = dst; + src = src; + n = n; + fprintf(stderr, "bsafe error: strncat() is not safe, use bstrcat then btrunc\n\tor cstr2tbstr, btrunc then bstrcat instead.\n"); + if (bsafeShouldExit) exit(-1); + return NULL; } -char * (strtok) (char *s1, const char *s2) { - s1 = s1; - s2 = s2; - fprintf (stderr, "bsafe error: strtok() is not safe, use bsplit or bsplits instead.\n"); - if (bsafeShouldExit) exit (-1); - return NULL; +char * (strtok)(char *s1, const char *s2) { + s1 = s1; + s2 = s2; + fprintf(stderr, "bsafe error: strtok() is not safe, use bsplit or bsplits instead.\n"); + if (bsafeShouldExit) exit(-1); + return NULL; } -char * (strdup) (const char *s) { - s = s; - fprintf (stderr, "bsafe error: strdup() is not safe, use bstrcpy.\n"); - if (bsafeShouldExit) exit (-1); - return NULL; +char * (strdup)(const char *s) { + s = s; + fprintf(stderr, "bsafe error: strdup() is not safe, use bstrcpy.\n"); + if (bsafeShouldExit) exit(-1); + return NULL; } #endif diff --git a/src/cbstring/bsafe.h b/src/cbstring/bsafe.h index eb41ec2..d921917 100644 --- a/src/cbstring/bsafe.h +++ b/src/cbstring/bsafe.h @@ -1,7 +1,7 @@ /* * This source file is part of the bstring string library. This code was - * written by Paul Hsieh in 2002-2010, and is covered by either the 3-clause - * BSD open source license or GPL v2.0. Refer to the accompanying documentation + * written by Paul Hsieh in 2002-2010, and is covered by either the 3-clause + * BSD open source license or GPL v2.0. Refer to the accompanying documentation * for details on usage and license. */ @@ -21,20 +21,20 @@ extern "C" { #endif -#if !defined (__GNUC__) && (!defined(_MSC_VER) || (_MSC_VER <= 1310)) +#if !defined(__GNUC__) && (!defined(_MSC_VER) || (_MSC_VER <= 1310)) /* This is caught in the linker, so its not necessary for gcc. */ -extern char * (gets) (char * buf); +extern char * (gets)(char * buf); #endif -extern char * (strncpy) (char *dst, const char *src, size_t n); -extern char * (strncat) (char *dst, const char *src, size_t n); -extern char * (strtok) (char *s1, const char *s2); -extern char * (strdup) (const char *s); +extern char * (strncpy)(char *dst, const char *src, size_t n); +extern char * (strncat)(char *dst, const char *src, size_t n); +extern char * (strtok)(char *s1, const char *s2); +extern char * (strdup)(const char *s); #undef strcpy #undef strcat -#define strcpy(a,b) bsafe_strcpy(a,b) -#define strcat(a,b) bsafe_strcat(a,b) +#define strcpy(a, b) bsafe_strcpy(a,b) +#define strcat(a, b) bsafe_strcat(a,b) #ifdef __cplusplus } diff --git a/src/cbstring/bstraux.c b/src/cbstring/bstraux.c index 5d7cb54..34cb3d3 100644 --- a/src/cbstring/bstraux.c +++ b/src/cbstring/bstraux.c @@ -1,7 +1,7 @@ /* * This source file is part of the bstring string library. This code was - * written by Paul Hsieh in 2002-2010, and is covered by either the 3-clause - * BSD open source license or GPL v2.0. Refer to the accompanying documentation + * written by Paul Hsieh in 2002-2010, and is covered by either the 3-clause + * BSD open source license or GPL v2.0. Refer to the accompanying documentation * for details on usage and license. */ @@ -9,7 +9,7 @@ * bstraux.c * * This file is not necessarily part of the core bstring library itself, but - * is just an auxilliary module which includes miscellaneous or trivial + * is just an auxilliary module which includes miscellaneous or trivial * functions. */ @@ -25,115 +25,126 @@ * * Return with a string of the last n characters of b. */ -bstring bTail (bstring b, int n) { - if (b == NULL || n < 0 || (b->mlen < b->slen && b->mlen > 0)) return NULL; - if (n >= b->slen) return bstrcpy (b); - return bmidstr (b, b->slen - n, n); +bstring bTail(bstring b, int n) +{ + if (b == NULL || n < 0 || (b->mlen < b->slen && b->mlen > 0)) return NULL; + if (n >= b->slen) return bstrcpy(b); + return bmidstr(b, b->slen - n, n); } /* bstring bHead (bstring b, int n) * * Return with a string of the first n characters of b. */ -bstring bHead (bstring b, int n) { - if (b == NULL || n < 0 || (b->mlen < b->slen && b->mlen > 0)) return NULL; - if (n >= b->slen) return bstrcpy (b); - return bmidstr (b, 0, n); +bstring bHead(bstring b, int n) +{ + if (b == NULL || n < 0 || (b->mlen < b->slen && b->mlen > 0)) return NULL; + if (n >= b->slen) return bstrcpy(b); + return bmidstr(b, 0, n); } /* int bFill (bstring a, char c, int len) * * Fill a given bstring with the character in parameter c, for a length n. */ -int bFill (bstring b, char c, int len) { - if (b == NULL || len < 0 || (b->mlen < b->slen && b->mlen > 0)) return -__LINE__; - b->slen = 0; - return bsetstr (b, len, NULL, c); +int bFill(bstring b, char c, int len) +{ + if (b == NULL || len < 0 || (b->mlen < b->slen && b->mlen > 0)) return -__LINE__; + b->slen = 0; + return bsetstr(b, len, NULL, c); } /* int bReplicate (bstring b, int n) * * Replicate the contents of b end to end n times and replace it in b. */ -int bReplicate (bstring b, int n) { - return bpattern (b, n * b->slen); +int bReplicate(bstring b, int n) +{ + return bpattern(b, n * b->slen); } /* int bReverse (bstring b) * * Reverse the contents of b in place. */ -int bReverse (bstring b) { -int i, n, m; -unsigned char t; - - if (b == NULL || b->slen < 0 || b->mlen < b->slen) return -__LINE__; - n = b->slen; - if (2 <= n) { - m = ((unsigned)n) >> 1; - n--; - for (i=0; i < m; i++) { - t = b->data[n - i]; - b->data[n - i] = b->data[i]; - b->data[i] = t; - } - } - return 0; +int bReverse(bstring b) +{ + int i, n, m; + unsigned char t; + + if (b == NULL || b->slen < 0 || b->mlen < b->slen) return -__LINE__; + n = b->slen; + if (2 <= n) + { + m = ((unsigned)n) >> 1; + n--; + for (i = 0; i < m; i++) + { + t = b->data[n - i]; + b->data[n - i] = b->data[i]; + b->data[i] = t; + } + } + return 0; } /* int bInsertChrs (bstring b, int pos, int len, unsigned char c, unsigned char fill) * - * Insert a repeated sequence of a given character into the string at + * Insert a repeated sequence of a given character into the string at * position pos for a length len. */ -int bInsertChrs (bstring b, int pos, int len, unsigned char c, unsigned char fill) { - if (b == NULL || b->slen < 0 || b->mlen < b->slen || pos < 0 || len <= 0) return -__LINE__; - - if (pos > b->slen - && 0 > bsetstr (b, pos, NULL, fill)) return -__LINE__; - - if (0 > balloc (b, b->slen + len)) return -__LINE__; - if (pos < b->slen) memmove (b->data + pos + len, b->data + pos, b->slen - pos); - memset (b->data + pos, c, len); - b->slen += len; - b->data[b->slen] = (unsigned char) '\0'; - return BSTR_OK; +int bInsertChrs(bstring b, int pos, int len, unsigned char c, unsigned char fill) +{ + if (b == NULL || b->slen < 0 || b->mlen < b->slen || pos < 0 || len <= 0) return -__LINE__; + + if (pos > b->slen + && 0 > bsetstr(b, pos, NULL, fill)) return -__LINE__; + + if (0 > balloc(b, b->slen + len)) return -__LINE__; + if (pos < b->slen) memmove(b->data + pos + len, b->data + pos, b->slen - pos); + memset(b->data + pos, c, len); + b->slen += len; + b->data[b->slen] = (unsigned char)'\0'; + return BSTR_OK; } /* int bJustifyLeft (bstring b, int space) * * Left justify a string. */ -int bJustifyLeft (bstring b, int space) { -int j, i, s, t; -unsigned char c = (unsigned char) space; - - if (b == NULL || b->slen < 0 || b->mlen < b->slen) return -__LINE__; - if (space != (int) c) return BSTR_OK; - - for (s=j=i=0; i < b->slen; i++) { - t = s; - s = c != (b->data[j] = b->data[i]); - j += (t|s); - } - if (j > 0 && b->data[j-1] == c) j--; - - b->data[j] = (unsigned char) '\0'; - b->slen = j; - return BSTR_OK; +int bJustifyLeft(bstring b, int space) +{ + int j, i, s, t; + unsigned char c = (unsigned char)space; + + if (b == NULL || b->slen < 0 || b->mlen < b->slen) return -__LINE__; + if (space != (int)c) return BSTR_OK; + + for (s = j = i = 0; i < b->slen; i++) + { + t = s; + s = c != (b->data[j] = b->data[i]); + j += (t | s); + } + if (j > 0 && b->data[j - 1] == c) j--; + + b->data[j] = (unsigned char)'\0'; + b->slen = j; + return BSTR_OK; } /* int bJustifyRight (bstring b, int width, int space) * * Right justify a string to within a given width. */ -int bJustifyRight (bstring b, int width, int space) { -int ret; - if (width <= 0) return -__LINE__; - if (0 > (ret = bJustifyLeft (b, space))) return ret; - if (b->slen <= width) - return bInsertChrs (b, 0, width - b->slen, (unsigned char) space, (unsigned char) space); - return BSTR_OK; +int bJustifyRight(bstring b, int width, int space) +{ + int ret; + if (width <= 0) return -__LINE__; + if (0 > (ret = bJustifyLeft(b, space))) return ret; + if (b->slen <= width) + return bInsertChrs(b, 0, width - b->slen, (unsigned char)space, (unsigned char)space); + return BSTR_OK; } /* int bJustifyCenter (bstring b, int width, int space) @@ -141,13 +152,14 @@ int ret; * Center a string's non-white space characters to within a given width by * inserting whitespaces at the beginning. */ -int bJustifyCenter (bstring b, int width, int space) { -int ret; - if (width <= 0) return -__LINE__; - if (0 > (ret = bJustifyLeft (b, space))) return ret; - if (b->slen <= width) - return bInsertChrs (b, 0, (width - b->slen + 1) >> 1, (unsigned char) space, (unsigned char) space); - return BSTR_OK; +int bJustifyCenter(bstring b, int width, int space) +{ + int ret; + if (width <= 0) return -__LINE__; + if (0 > (ret = bJustifyLeft(b, space))) return ret; + if (b->slen <= width) + return bInsertChrs(b, 0, (width - b->slen + 1) >> 1, (unsigned char)space, (unsigned char)space); + return BSTR_OK; } /* int bJustifyMargin (bstring b, int width, int space) @@ -156,44 +168,52 @@ int ret; * distributing additional white space between words. If the line is too * long to be margin justified, it is left justified. */ -int bJustifyMargin (bstring b, int width, int space) { -struct bstrList * sl; -int i, l, c; - - if (b == NULL || b->slen < 0 || b->mlen == 0 || b->mlen < b->slen) return -__LINE__; - if (NULL == (sl = bsplit (b, (unsigned char) space))) return -__LINE__; - for (l=c=i=0; i < sl->qty; i++) { - if (sl->entry[i]->slen > 0) { - c ++; - l += sl->entry[i]->slen; - } - } - - if (l + c >= width || c < 2) { - bstrListDestroy (sl); - return bJustifyLeft (b, space); - } - - b->slen = 0; - for (i=0; i < sl->qty; i++) { - if (sl->entry[i]->slen > 0) { - if (b->slen > 0) { - int s = (width - l + (c / 2)) / c; - bInsertChrs (b, b->slen, s, (unsigned char) space, (unsigned char) space); - l += s; - } - bconcat (b, sl->entry[i]); - c--; - if (c <= 0) break; - } - } - - bstrListDestroy (sl); - return BSTR_OK; +int bJustifyMargin(bstring b, int width, int space) +{ + struct bstrList * sl; + int i, l, c; + + if (b == NULL || b->slen < 0 || b->mlen == 0 || b->mlen < b->slen) return -__LINE__; + if (NULL == (sl = bsplit(b, (unsigned char)space))) return -__LINE__; + for (l = c = i = 0; i < sl->qty; i++) + { + if (sl->entry[i]->slen > 0) + { + c++; + l += sl->entry[i]->slen; + } + } + + if (l + c >= width || c < 2) + { + bstrListDestroy(sl); + return bJustifyLeft(b, space); + } + + b->slen = 0; + for (i = 0; i < sl->qty; i++) + { + if (sl->entry[i]->slen > 0) + { + if (b->slen > 0) + { + int s = (width - l + (c / 2)) / c; + bInsertChrs(b, b->slen, s, (unsigned char)space, (unsigned char)space); + l += s; + } + bconcat(b, sl->entry[i]); + c--; + if (c <= 0) break; + } + } + + bstrListDestroy(sl); + return BSTR_OK; } -static size_t readNothing (void *buff, size_t elsize, size_t nelem, void *parm) { - return 0; /* Immediately indicate EOF. */ +static size_t readNothing(void *buff, size_t elsize, size_t nelem, void *parm) +{ + return 0; /* Immediately indicate EOF. */ } /* struct bStream * bsFromBstr (const_bstring b); @@ -201,94 +221,103 @@ static size_t readNothing (void *buff, size_t elsize, size_t nelem, void *parm) * Create a bStream whose contents are a copy of the bstring passed in. * This allows the use of all the bStream APIs with bstrings. */ -struct bStream * bsFromBstr (const_bstring b) { -struct bStream * s = bsopen ((bNread) readNothing, NULL); - bsunread (s, b); /* Push the bstring data into the empty bStream. */ - return s; +struct bStream * bsFromBstr(const_bstring b) +{ + struct bStream * s = bsopen((bNread)readNothing, NULL); + bsunread(s, b); /* Push the bstring data into the empty bStream. */ + return s; } -static size_t readRef (void *buff, size_t elsize, size_t nelem, void *parm) { -struct tagbstring * t = (struct tagbstring *) parm; -size_t tsz = elsize * nelem; - - if (tsz > (size_t) t->slen) tsz = (size_t) t->slen; - if (tsz > 0) { - memcpy (buff, t->data, tsz); - t->slen -= (int) tsz; - t->data += tsz; - return tsz / elsize; - } - return 0; +static size_t readRef(void *buff, size_t elsize, size_t nelem, void *parm) +{ + struct tagbstring * t = (struct tagbstring *)parm; + size_t tsz = elsize * nelem; + + if (tsz > (size_t)t->slen) tsz = (size_t)t->slen; + if (tsz > 0) + { + memcpy(buff, t->data, tsz); + t->slen -= (int)tsz; + t->data += tsz; + return tsz / elsize; + } + return 0; } /* The "by reference" version of the above function. This function puts - * a number of restrictions on the call site (the passed in struct + * a number of restrictions on the call site (the passed in struct * tagbstring *will* be modified by this function, and the source data - * must remain alive and constant for the lifetime of the bStream). + * must remain alive and constant for the lifetime of the bStream). * Hence it is not presented as an extern. */ -static struct bStream * bsFromBstrRef (struct tagbstring * t) { - if (!t) return NULL; - return bsopen ((bNread) readRef, t); +static struct bStream * bsFromBstrRef(struct tagbstring * t) +{ + if (!t) return NULL; + return bsopen((bNread)readRef, t); } /* char * bStr2NetStr (const_bstring b) * - * Convert a bstring to a netstring. See + * Convert a bstring to a netstring. See * http://cr.yp.to/proto/netstrings.txt for a description of netstrings. - * Note: 1) The value returned should be freed with a call to bcstrfree() at - * the point when it will no longer be referenced to avoid a memory + * Note: 1) The value returned should be freed with a call to bcstrfree() at + * the point when it will no longer be referenced to avoid a memory * leak. * 2) If the returned value is non-NULL, then it also '\0' terminated * in the character position one past the "," terminator. */ -char * bStr2NetStr (const_bstring b) { -char strnum[sizeof (b->slen) * 3 + 1]; -bstring s; -unsigned char * buff; - - if (b == NULL || b->data == NULL || b->slen < 0) return NULL; - sprintf (strnum, "%d:", b->slen); - if (NULL == (s = bfromcstr (strnum)) - || bconcat (s, b) == BSTR_ERR || bconchar (s, (char) ',') == BSTR_ERR) { - bdestroy (s); - return NULL; - } - buff = s->data; - bcstrfree ((char *) s); - return (char *) buff; +char * bStr2NetStr(const_bstring b) +{ + char strnum[sizeof(b->slen) * 3 + 1]; + bstring s; + unsigned char * buff; + + if (b == NULL || b->data == NULL || b->slen < 0) return NULL; + sprintf(strnum, "%d:", b->slen); + if (NULL == (s = bfromcstr(strnum)) + || bconcat(s, b) == BSTR_ERR || bconchar(s, (char)',') == BSTR_ERR) + { + bdestroy(s); + return NULL; + } + buff = s->data; + bcstrfree((char *)s); + return (char *)buff; } /* bstring bNetStr2Bstr (const char * buf) * - * Convert a netstring to a bstring. See + * Convert a netstring to a bstring. See * http://cr.yp.to/proto/netstrings.txt for a description of netstrings. * Note that the terminating "," *must* be present, however a following '\0' * is *not* required. */ -bstring bNetStr2Bstr (const char * buff) { -int i, x; -bstring b; - if (buff == NULL) return NULL; - x = 0; - for (i=0; buff[i] != ':'; i++) { - unsigned int v = buff[i] - '0'; - if (v > 9 || x > ((INT_MAX - (signed int)v) / 10)) return NULL; - x = (x * 10) + v; - } - - /* This thing has to be properly terminated */ - if (buff[i + 1 + x] != ',') return NULL; - - if (NULL == (b = bfromcstr (""))) return NULL; - if (balloc (b, x + 1) != BSTR_OK) { - bdestroy (b); - return NULL; - } - memcpy (b->data, buff + i + 1, x); - b->data[x] = (unsigned char) '\0'; - b->slen = x; - return b; +bstring bNetStr2Bstr(const char * buff) +{ + int i, x; + bstring b; + if (buff == NULL) return NULL; + x = 0; + for (i = 0; buff[i] != ':'; i++) + { + unsigned int v = buff[i] - '0'; + if (v > 9 || x > ((INT_MAX - (signed int)v) / 10)) return NULL; + x = (x * 10) + v; + } + + /* This thing has to be properly terminated */ + if (buff[i + 1 + x] != ',') return NULL; + + if (NULL == (b = bfromcstr(""))) return NULL; + if (balloc(b, x + 1) != BSTR_OK) + { + bdestroy(b); + return NULL; + } + memcpy(b->data, buff + i + 1, x); + b->data[x] = (unsigned char)'\0'; + b->slen = x; + return b; } static char b64ETable[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; @@ -297,84 +326,95 @@ static char b64ETable[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0 * * Generate a base64 encoding. See: RFC1341 */ -bstring bBase64Encode (const_bstring b) { -int i, c0, c1, c2, c3; -bstring out; - - if (b == NULL || b->slen < 0 || b->data == NULL) return NULL; - - out = bfromcstr (""); - for (i=0; i + 2 < b->slen; i += 3) { - if (i && ((i % 57) == 0)) { - if (bconchar (out, (char) '\015') < 0 || bconchar (out, (char) '\012') < 0) { - bdestroy (out); - return NULL; - } - } - c0 = b->data[i] >> 2; - c1 = ((b->data[i] << 4) | - (b->data[i+1] >> 4)) & 0x3F; - c2 = ((b->data[i+1] << 2) | - (b->data[i+2] >> 6)) & 0x3F; - c3 = b->data[i+2] & 0x3F; - if (bconchar (out, b64ETable[c0]) < 0 || - bconchar (out, b64ETable[c1]) < 0 || - bconchar (out, b64ETable[c2]) < 0 || - bconchar (out, b64ETable[c3]) < 0) { - bdestroy (out); - return NULL; - } - } - - if (i && ((i % 57) == 0)) { - if (bconchar (out, (char) '\015') < 0 || bconchar (out, (char) '\012') < 0) { - bdestroy (out); - return NULL; - } - } - - switch (i + 2 - b->slen) { - case 0: c0 = b->data[i] >> 2; - c1 = ((b->data[i] << 4) | - (b->data[i+1] >> 4)) & 0x3F; - c2 = (b->data[i+1] << 2) & 0x3F; - if (bconchar (out, b64ETable[c0]) < 0 || - bconchar (out, b64ETable[c1]) < 0 || - bconchar (out, b64ETable[c2]) < 0 || - bconchar (out, (char) '=') < 0) { - bdestroy (out); - return NULL; - } - break; - case 1: c0 = b->data[i] >> 2; - c1 = (b->data[i] << 4) & 0x3F; - if (bconchar (out, b64ETable[c0]) < 0 || - bconchar (out, b64ETable[c1]) < 0 || - bconchar (out, (char) '=') < 0 || - bconchar (out, (char) '=') < 0) { - bdestroy (out); - return NULL; - } - break; - case 2: break; - } - - return out; +bstring bBase64Encode(const_bstring b) +{ + int i, c0, c1, c2, c3; + bstring out; + + if (b == NULL || b->slen < 0 || b->data == NULL) return NULL; + + out = bfromcstr(""); + for (i = 0; i + 2 < b->slen; i += 3) + { + if (i && ((i % 57) == 0)) + { + if (bconchar(out, (char)'\015') < 0 || bconchar(out, (char)'\012') < 0) + { + bdestroy(out); + return NULL; + } + } + c0 = b->data[i] >> 2; + c1 = ((b->data[i] << 4) | + (b->data[i + 1] >> 4)) & 0x3F; + c2 = ((b->data[i + 1] << 2) | + (b->data[i + 2] >> 6)) & 0x3F; + c3 = b->data[i + 2] & 0x3F; + if (bconchar(out, b64ETable[c0]) < 0 || + bconchar(out, b64ETable[c1]) < 0 || + bconchar(out, b64ETable[c2]) < 0 || + bconchar(out, b64ETable[c3]) < 0) + { + bdestroy(out); + return NULL; + } + } + + if (i && ((i % 57) == 0)) + { + if (bconchar(out, (char)'\015') < 0 || bconchar(out, (char)'\012') < 0) + { + bdestroy(out); + return NULL; + } + } + + switch (i + 2 - b->slen) + { + case 0: c0 = b->data[i] >> 2; + c1 = ((b->data[i] << 4) | + (b->data[i + 1] >> 4)) & 0x3F; + c2 = (b->data[i + 1] << 2) & 0x3F; + if (bconchar(out, b64ETable[c0]) < 0 || + bconchar(out, b64ETable[c1]) < 0 || + bconchar(out, b64ETable[c2]) < 0 || + bconchar(out, (char)'=') < 0) + { + bdestroy(out); + return NULL; + } + break; + case 1: c0 = b->data[i] >> 2; + c1 = (b->data[i] << 4) & 0x3F; + if (bconchar(out, b64ETable[c0]) < 0 || + bconchar(out, b64ETable[c1]) < 0 || + bconchar(out, (char)'=') < 0 || + bconchar(out, (char)'=') < 0) + { + bdestroy(out); + return NULL; + } + break; + case 2: break; + } + + return out; } #define B64_PAD (-2) #define B64_ERR (-1) -static int base64DecodeSymbol (unsigned char alpha) { - if ((alpha >= 'A') && (alpha <= 'Z')) return (int)(alpha - 'A'); - else if ((alpha >= 'a') && (alpha <= 'z')) +static int base64DecodeSymbol(unsigned char alpha) +{ + if ((alpha >= 'A') && (alpha <= 'Z')) return (int)(alpha - 'A'); + else if ((alpha >= 'a') && (alpha <= 'z')) return 26 + (int)(alpha - 'a'); - else if ((alpha >= '0') && (alpha <= '9')) + else if ((alpha >= '0') && (alpha <= '9')) return 52 + (int)(alpha - '0'); - else if (alpha == '+') return 62; - else if (alpha == '/') return 63; - else if (alpha == '=') return B64_PAD; - else return B64_ERR; + else if (alpha == '+') return 62; + else if (alpha == '/') return 63; + else if (alpha == '=') return B64_PAD; + else return B64_ERR; } /* bstring bBase64DecodeEx (const_bstring b, int * boolTruncError) @@ -382,195 +422,240 @@ static int base64DecodeSymbol (unsigned char alpha) { * Decode a base64 block of data. All MIME headers are assumed to have been * removed. See: RFC1341 */ -bstring bBase64DecodeEx (const_bstring b, int * boolTruncError) { -int i, v; -unsigned char c0, c1, c2; -bstring out; - - if (b == NULL || b->slen < 0 || b->data == NULL) return NULL; - if (boolTruncError) *boolTruncError = 0; - out = bfromcstr (""); - i = 0; - for (;;) { - do { - if (i >= b->slen) return out; - if (b->data[i] == '=') { /* Bad "too early" truncation */ - if (boolTruncError) { - *boolTruncError = 1; - return out; - } - bdestroy (out); - return NULL; - } - v = base64DecodeSymbol (b->data[i]); - i++; - } while (v < 0); - c0 = (unsigned char) (v << 2); - do { - if (i >= b->slen || b->data[i] == '=') { /* Bad "too early" truncation */ - if (boolTruncError) { - *boolTruncError = 1; - return out; - } - bdestroy (out); - return NULL; - } - v = base64DecodeSymbol (b->data[i]); - i++; - } while (v < 0); - c0 |= (unsigned char) (v >> 4); - c1 = (unsigned char) (v << 4); - do { - if (i >= b->slen) { - if (boolTruncError) { - *boolTruncError = 1; - return out; - } - bdestroy (out); - return NULL; - } - if (b->data[i] == '=') { - i++; - if (i >= b->slen || b->data[i] != '=' || bconchar (out, c0) < 0) { - if (boolTruncError) { - *boolTruncError = 1; - return out; - } - bdestroy (out); /* Missing "=" at the end. */ - return NULL; - } - return out; - } - v = base64DecodeSymbol (b->data[i]); - i++; - } while (v < 0); - c1 |= (unsigned char) (v >> 2); - c2 = (unsigned char) (v << 6); - do { - if (i >= b->slen) { - if (boolTruncError) { - *boolTruncError = 1; - return out; - } - bdestroy (out); - return NULL; - } - if (b->data[i] == '=') { - if (bconchar (out, c0) < 0 || bconchar (out, c1) < 0) { - if (boolTruncError) { - *boolTruncError = 1; - return out; - } - bdestroy (out); - return NULL; - } - if (boolTruncError) *boolTruncError = 0; - return out; - } - v = base64DecodeSymbol (b->data[i]); - i++; - } while (v < 0); - c2 |= (unsigned char) (v); - if (bconchar (out, c0) < 0 || - bconchar (out, c1) < 0 || - bconchar (out, c2) < 0) { - if (boolTruncError) { - *boolTruncError = -1; - return out; - } - bdestroy (out); - return NULL; - } - } +bstring bBase64DecodeEx(const_bstring b, int * boolTruncError) +{ + int i, v; + unsigned char c0, c1, c2; + bstring out; + + if (b == NULL || b->slen < 0 || b->data == NULL) return NULL; + if (boolTruncError) *boolTruncError = 0; + out = bfromcstr(""); + i = 0; + for (;;) + { + do + { + if (i >= b->slen) return out; + if (b->data[i] == '=') /* Bad "too early" truncation */ + { + if (boolTruncError) + { + *boolTruncError = 1; + return out; + } + bdestroy(out); + return NULL; + } + v = base64DecodeSymbol(b->data[i]); + i++; + } + while (v < 0); + c0 = (unsigned char)(v << 2); + do + { + if (i >= b->slen || b->data[i] == '=') /* Bad "too early" truncation */ + { + if (boolTruncError) + { + *boolTruncError = 1; + return out; + } + bdestroy(out); + return NULL; + } + v = base64DecodeSymbol(b->data[i]); + i++; + } + while (v < 0); + c0 |= (unsigned char)(v >> 4); + c1 = (unsigned char)(v << 4); + do + { + if (i >= b->slen) + { + if (boolTruncError) + { + *boolTruncError = 1; + return out; + } + bdestroy(out); + return NULL; + } + if (b->data[i] == '=') + { + i++; + if (i >= b->slen || b->data[i] != '=' || bconchar(out, c0) < 0) + { + if (boolTruncError) + { + *boolTruncError = 1; + return out; + } + bdestroy(out); /* Missing "=" at the end. */ + return NULL; + } + return out; + } + v = base64DecodeSymbol(b->data[i]); + i++; + } + while (v < 0); + c1 |= (unsigned char)(v >> 2); + c2 = (unsigned char)(v << 6); + do + { + if (i >= b->slen) + { + if (boolTruncError) + { + *boolTruncError = 1; + return out; + } + bdestroy(out); + return NULL; + } + if (b->data[i] == '=') + { + if (bconchar(out, c0) < 0 || bconchar(out, c1) < 0) + { + if (boolTruncError) + { + *boolTruncError = 1; + return out; + } + bdestroy(out); + return NULL; + } + if (boolTruncError) *boolTruncError = 0; + return out; + } + v = base64DecodeSymbol(b->data[i]); + i++; + } + while (v < 0); + c2 |= (unsigned char)(v); + if (bconchar(out, c0) < 0 || + bconchar(out, c1) < 0 || + bconchar(out, c2) < 0) + { + if (boolTruncError) + { + *boolTruncError = -1; + return out; + } + bdestroy(out); + return NULL; + } + } } #define UU_DECODE_BYTE(b) (((b) == (signed int)'`') ? 0 : (b) - (signed int)' ') -struct bUuInOut { - bstring src, dst; - int * badlines; +struct bUuInOut +{ + bstring src, dst; + int * badlines; }; #define UU_MAX_LINELEN 45 -static int bUuDecLine (void * parm, int ofs, int len) { -struct bUuInOut * io = (struct bUuInOut *) parm; -bstring s = io->src; -bstring t = io->dst; -int i, llen, otlen, ret, c0, c1, c2, c3, d0, d1, d2, d3; - - if (len == 0) return 0; - llen = UU_DECODE_BYTE (s->data[ofs]); - ret = 0; - - otlen = t->slen; - - if (((unsigned) llen) > UU_MAX_LINELEN) { ret = -__LINE__; - goto bl; - } - - llen += t->slen; - - for (i=1; i < s->slen && t->slen < llen;i += 4) { - unsigned char outoctet[3]; - c0 = UU_DECODE_BYTE (d0 = (int) bchare (s, i+ofs+0, ' ' - 1)); - c1 = UU_DECODE_BYTE (d1 = (int) bchare (s, i+ofs+1, ' ' - 1)); - c2 = UU_DECODE_BYTE (d2 = (int) bchare (s, i+ofs+2, ' ' - 1)); - c3 = UU_DECODE_BYTE (d3 = (int) bchare (s, i+ofs+3, ' ' - 1)); - - if (((unsigned) (c0|c1) >= 0x40)) { if (!ret) ret = -__LINE__; - if (d0 > 0x60 || (d0 < (' ' - 1) && !isspace (d0)) || - d1 > 0x60 || (d1 < (' ' - 1) && !isspace (d1))) { - t->slen = otlen; - goto bl; - } - c0 = c1 = 0; - } - outoctet[0] = (unsigned char) ((c0 << 2) | ((unsigned) c1 >> 4)); - if (t->slen+1 >= llen) { - if (0 > bconchar (t, (char) outoctet[0])) return -__LINE__; - break; - } - if ((unsigned) c2 >= 0x40) { if (!ret) ret = -__LINE__; - if (d2 > 0x60 || (d2 < (' ' - 1) && !isspace (d2))) { - t->slen = otlen; - goto bl; - } - c2 = 0; - } - outoctet[1] = (unsigned char) ((c1 << 4) | ((unsigned) c2 >> 2)); - if (t->slen+2 >= llen) { - if (0 > bcatblk (t, outoctet, 2)) return -__LINE__; - break; - } - if ((unsigned) c3 >= 0x40) { if (!ret) ret = -__LINE__; - if (d3 > 0x60 || (d3 < (' ' - 1) && !isspace (d3))) { - t->slen = otlen; - goto bl; - } - c3 = 0; - } - outoctet[2] = (unsigned char) ((c2 << 6) | ((unsigned) c3)); - if (0 > bcatblk (t, outoctet, 3)) return -__LINE__; - } - if (t->slen < llen) { if (0 == ret) ret = -__LINE__; - t->slen = otlen; - } - bl:; - if (ret && io->badlines) { - (*io->badlines)++; - return 0; - } - return ret; +static int bUuDecLine(void * parm, int ofs, int len) +{ + struct bUuInOut * io = (struct bUuInOut *)parm; + bstring s = io->src; + bstring t = io->dst; + int i, llen, otlen, ret, c0, c1, c2, c3, d0, d1, d2, d3; + + if (len == 0) return 0; + llen = UU_DECODE_BYTE(s->data[ofs]); + ret = 0; + + otlen = t->slen; + + if (((unsigned)llen) > UU_MAX_LINELEN) + { + ret = -__LINE__; + goto bl; + } + + llen += t->slen; + + for (i = 1; i < s->slen && t->slen < llen; i += 4) + { + unsigned char outoctet[3]; + c0 = UU_DECODE_BYTE(d0 = (int)bchare(s, i + ofs + 0, ' ' - 1)); + c1 = UU_DECODE_BYTE(d1 = (int)bchare(s, i + ofs + 1, ' ' - 1)); + c2 = UU_DECODE_BYTE(d2 = (int)bchare(s, i + ofs + 2, ' ' - 1)); + c3 = UU_DECODE_BYTE(d3 = (int)bchare(s, i + ofs + 3, ' ' - 1)); + + if (((unsigned)(c0 | c1) >= 0x40)) + { + if (!ret) ret = -__LINE__; + if (d0 > 0x60 || (d0 < (' ' - 1) && !isspace(d0)) || + d1 > 0x60 || (d1 < (' ' - 1) && !isspace(d1))) + { + t->slen = otlen; + goto bl; + } + c0 = c1 = 0; + } + outoctet[0] = (unsigned char)((c0 << 2) | ((unsigned)c1 >> 4)); + if (t->slen + 1 >= llen) + { + if (0 > bconchar(t, (char)outoctet[0])) return -__LINE__; + break; + } + if ((unsigned)c2 >= 0x40) + { + if (!ret) ret = -__LINE__; + if (d2 > 0x60 || (d2 < (' ' - 1) && !isspace(d2))) + { + t->slen = otlen; + goto bl; + } + c2 = 0; + } + outoctet[1] = (unsigned char)((c1 << 4) | ((unsigned)c2 >> 2)); + if (t->slen + 2 >= llen) + { + if (0 > bcatblk(t, outoctet, 2)) return -__LINE__; + break; + } + if ((unsigned)c3 >= 0x40) + { + if (!ret) ret = -__LINE__; + if (d3 > 0x60 || (d3 < (' ' - 1) && !isspace(d3))) + { + t->slen = otlen; + goto bl; + } + c3 = 0; + } + outoctet[2] = (unsigned char)((c2 << 6) | ((unsigned)c3)); + if (0 > bcatblk(t, outoctet, 3)) return -__LINE__; + } + if (t->slen < llen) + { + if (0 == ret) ret = -__LINE__; + t->slen = otlen; + } +bl:; + if (ret && io->badlines) + { + (*io->badlines)++; + return 0; + } + return ret; } /* bstring bUuDecodeEx (const_bstring src, int * badlines) * * Performs a UUDecode of a block of data. If there are errors in the * decoding, they are counted up and returned in "badlines", if badlines is - * not NULL. It is assumed that the "begin" and "end" lines have already - * been stripped off. The potential security problem of writing the - * filename in the begin line is something that is beyond the scope of a + * not NULL. It is assumed that the "begin" and "end" lines have already + * been stripped off. The potential security problem of writing the + * filename in the begin line is something that is beyond the scope of a * portable library. */ @@ -578,302 +663,341 @@ int i, llen, otlen, ret, c0, c1, c2, c3, d0, d1, d2, d3; #pragma warning(disable:4204) #endif -bstring bUuDecodeEx (const_bstring src, int * badlines) { -struct tagbstring t; -struct bStream * s; -struct bStream * d; -bstring b; - - if (!src) return NULL; - t = *src; /* Short lifetime alias to header of src */ - s = bsFromBstrRef (&t); /* t is undefined after this */ - if (!s) return NULL; - d = bsUuDecode (s, badlines); - b = bfromcstralloc (256, ""); - if (NULL == b || 0 > bsread (b, d, INT_MAX)) { - bdestroy (b); - bsclose (d); - bsclose (s); - return NULL; - } - return b; +bstring bUuDecodeEx(const_bstring src, int * badlines) +{ + struct tagbstring t; + struct bStream * s; + struct bStream * d; + bstring b; + + if (!src) return NULL; + t = *src; /* Short lifetime alias to header of src */ + s = bsFromBstrRef(&t); /* t is undefined after this */ + if (!s) return NULL; + d = bsUuDecode(s, badlines); + b = bfromcstralloc(256, ""); + if (NULL == b || 0 > bsread(b, d, INT_MAX)) + { + bdestroy(b); + bsclose(d); + bsclose(s); + return NULL; + } + return b; } -struct bsUuCtx { - struct bUuInOut io; - struct bStream * sInp; +struct bsUuCtx +{ + struct bUuInOut io; + struct bStream * sInp; }; -static size_t bsUuDecodePart (void *buff, size_t elsize, size_t nelem, void *parm) { -static struct tagbstring eol = bsStatic ("\r\n"); -struct bsUuCtx * luuCtx = (struct bsUuCtx *) parm; -size_t tsz; -int l, lret; - - if (NULL == buff || NULL == parm) return 0; - tsz = elsize * nelem; - - CheckInternalBuffer:; - /* If internal buffer has sufficient data, just output it */ - if (((size_t) luuCtx->io.dst->slen) > tsz) { - memcpy (buff, luuCtx->io.dst->data, tsz); - bdelete (luuCtx->io.dst, 0, (int) tsz); - return nelem; - } - - DecodeMore:; - if (0 <= (l = binchr (luuCtx->io.src, 0, &eol))) { - int ol = 0; - struct tagbstring t; - bstring s = luuCtx->io.src; - luuCtx->io.src = &t; - - do { - if (l > ol) { - bmid2tbstr (t, s, ol, l - ol); - lret = bUuDecLine (&luuCtx->io, 0, t.slen); - if (0 > lret) { - luuCtx->io.src = s; - goto Done; - } - } - ol = l + 1; - if (((size_t) luuCtx->io.dst->slen) > tsz) break; - l = binchr (s, ol, &eol); - } while (BSTR_ERR != l); - bdelete (s, 0, ol); - luuCtx->io.src = s; - goto CheckInternalBuffer; - } - - if (BSTR_ERR != bsreada (luuCtx->io.src, luuCtx->sInp, bsbufflength (luuCtx->sInp, BSTR_BS_BUFF_LENGTH_GET))) { - goto DecodeMore; - } - - bUuDecLine (&luuCtx->io, 0, luuCtx->io.src->slen); - - Done:; - /* Output any lingering data that has been translated */ - if (((size_t) luuCtx->io.dst->slen) > 0) { - if (((size_t) luuCtx->io.dst->slen) > tsz) goto CheckInternalBuffer; - memcpy (buff, luuCtx->io.dst->data, luuCtx->io.dst->slen); - tsz = luuCtx->io.dst->slen / elsize; - luuCtx->io.dst->slen = 0; - if (tsz > 0) return tsz; - } - - /* Deallocate once EOF becomes triggered */ - bdestroy (luuCtx->io.dst); - bdestroy (luuCtx->io.src); - free (luuCtx); - return 0; +static size_t bsUuDecodePart(void *buff, size_t elsize, size_t nelem, void *parm) +{ + static struct tagbstring eol = bsStatic("\r\n"); + struct bsUuCtx * luuCtx = (struct bsUuCtx *)parm; + size_t tsz; + int l, lret; + + if (NULL == buff || NULL == parm) return 0; + tsz = elsize * nelem; + +CheckInternalBuffer:; + /* If internal buffer has sufficient data, just output it */ + if (((size_t)luuCtx->io.dst->slen) > tsz) + { + memcpy(buff, luuCtx->io.dst->data, tsz); + bdelete(luuCtx->io.dst, 0, (int)tsz); + return nelem; + } + +DecodeMore:; + if (0 <= (l = binchr(luuCtx->io.src, 0, &eol))) + { + int ol = 0; + struct tagbstring t; + bstring s = luuCtx->io.src; + luuCtx->io.src = &t; + + do + { + if (l > ol) + { + bmid2tbstr(t, s, ol, l - ol); + lret = bUuDecLine(&luuCtx->io, 0, t.slen); + if (0 > lret) + { + luuCtx->io.src = s; + goto Done; + } + } + ol = l + 1; + if (((size_t)luuCtx->io.dst->slen) > tsz) break; + l = binchr(s, ol, &eol); + } + while (BSTR_ERR != l); + bdelete(s, 0, ol); + luuCtx->io.src = s; + goto CheckInternalBuffer; + } + + if (BSTR_ERR != bsreada(luuCtx->io.src, luuCtx->sInp, bsbufflength(luuCtx->sInp, BSTR_BS_BUFF_LENGTH_GET))) + { + goto DecodeMore; + } + + bUuDecLine(&luuCtx->io, 0, luuCtx->io.src->slen); + +Done:; + /* Output any lingering data that has been translated */ + if (((size_t)luuCtx->io.dst->slen) > 0) + { + if (((size_t)luuCtx->io.dst->slen) > tsz) goto CheckInternalBuffer; + memcpy(buff, luuCtx->io.dst->data, luuCtx->io.dst->slen); + tsz = luuCtx->io.dst->slen / elsize; + luuCtx->io.dst->slen = 0; + if (tsz > 0) return tsz; + } + + /* Deallocate once EOF becomes triggered */ + bdestroy(luuCtx->io.dst); + bdestroy(luuCtx->io.src); + free(luuCtx); + return 0; } /* bStream * bsUuDecode (struct bStream * sInp, int * badlines) * * Creates a bStream which performs the UUDecode of an an input stream. If - * there are errors in the decoding, they are counted up and returned in - * "badlines", if badlines is not NULL. It is assumed that the "begin" and - * "end" lines have already been stripped off. The potential security - * problem of writing the filename in the begin line is something that is + * there are errors in the decoding, they are counted up and returned in + * "badlines", if badlines is not NULL. It is assumed that the "begin" and + * "end" lines have already been stripped off. The potential security + * problem of writing the filename in the begin line is something that is * beyond the scope of a portable library. */ -struct bStream * bsUuDecode (struct bStream * sInp, int * badlines) { -struct bsUuCtx * luuCtx = (struct bsUuCtx *) malloc (sizeof (struct bsUuCtx)); -struct bStream * sOut; - - if (NULL == luuCtx) return NULL; - - luuCtx->io.src = bfromcstr (""); - luuCtx->io.dst = bfromcstr (""); - if (NULL == luuCtx->io.dst || NULL == luuCtx->io.src) { - CleanUpFailureToAllocate:; - bdestroy (luuCtx->io.dst); - bdestroy (luuCtx->io.src); - free (luuCtx); - return NULL; - } - luuCtx->io.badlines = badlines; - if (badlines) *badlines = 0; - - luuCtx->sInp = sInp; - - sOut = bsopen ((bNread) bsUuDecodePart, luuCtx); - if (NULL == sOut) goto CleanUpFailureToAllocate; - return sOut; +struct bStream * bsUuDecode(struct bStream * sInp, int * badlines) +{ + struct bsUuCtx * luuCtx = (struct bsUuCtx *)malloc(sizeof(struct bsUuCtx)); + struct bStream * sOut; + + if (NULL == luuCtx) return NULL; + + luuCtx->io.src = bfromcstr(""); + luuCtx->io.dst = bfromcstr(""); + if (NULL == luuCtx->io.dst || NULL == luuCtx->io.src) + { + CleanUpFailureToAllocate :; + bdestroy(luuCtx->io.dst); + bdestroy(luuCtx->io.src); + free(luuCtx); + return NULL; + } + luuCtx->io.badlines = badlines; + if (badlines) *badlines = 0; + + luuCtx->sInp = sInp; + + sOut = bsopen((bNread)bsUuDecodePart, luuCtx); + if (NULL == sOut) goto CleanUpFailureToAllocate; + return sOut; } #define UU_ENCODE_BYTE(b) (char) (((b) == 0) ? '`' : ((b) + ' ')) /* bstring bUuEncode (const_bstring src) * - * Performs a UUEncode of a block of data. The "begin" and "end" lines are + * Performs a UUEncode of a block of data. The "begin" and "end" lines are * not appended. */ -bstring bUuEncode (const_bstring src) { -bstring out; -int i, j, jm; -unsigned int c0, c1, c2; - if (src == NULL || src->slen < 0 || src->data == NULL) return NULL; - if ((out = bfromcstr ("")) == NULL) return NULL; - for (i=0; i < src->slen; i += UU_MAX_LINELEN) { - if ((jm = i + UU_MAX_LINELEN) > src->slen) jm = src->slen; - if (bconchar (out, UU_ENCODE_BYTE (jm - i)) < 0) { - bstrFree (out); - break; - } - for (j = i; j < jm; j += 3) { - c0 = (unsigned int) bchar (src, j ); - c1 = (unsigned int) bchar (src, j + 1); - c2 = (unsigned int) bchar (src, j + 2); - if (bconchar (out, UU_ENCODE_BYTE ( (c0 & 0xFC) >> 2)) < 0 || - bconchar (out, UU_ENCODE_BYTE (((c0 & 0x03) << 4) | ((c1 & 0xF0) >> 4))) < 0 || - bconchar (out, UU_ENCODE_BYTE (((c1 & 0x0F) << 2) | ((c2 & 0xC0) >> 6))) < 0 || - bconchar (out, UU_ENCODE_BYTE ( (c2 & 0x3F))) < 0) { - bstrFree (out); - goto End; - } - } - if (bconchar (out, (char) '\r') < 0 || bconchar (out, (char) '\n') < 0) { - bstrFree (out); - break; - } - } - End:; - return out; +bstring bUuEncode(const_bstring src) +{ + bstring out; + int i, j, jm; + unsigned int c0, c1, c2; + if (src == NULL || src->slen < 0 || src->data == NULL) return NULL; + if ((out = bfromcstr("")) == NULL) return NULL; + for (i = 0; i < src->slen; i += UU_MAX_LINELEN) + { + if ((jm = i + UU_MAX_LINELEN) > src->slen) jm = src->slen; + if (bconchar(out, UU_ENCODE_BYTE(jm - i)) < 0) + { + bstrFree(out); + break; + } + for (j = i; j < jm; j += 3) + { + c0 = (unsigned int)bchar(src, j); + c1 = (unsigned int)bchar(src, j + 1); + c2 = (unsigned int)bchar(src, j + 2); + if (bconchar(out, UU_ENCODE_BYTE((c0 & 0xFC) >> 2)) < 0 || + bconchar(out, UU_ENCODE_BYTE(((c0 & 0x03) << 4) | ((c1 & 0xF0) >> 4))) < 0 || + bconchar(out, UU_ENCODE_BYTE(((c1 & 0x0F) << 2) | ((c2 & 0xC0) >> 6))) < 0 || + bconchar(out, UU_ENCODE_BYTE((c2 & 0x3F))) < 0) + { + bstrFree(out); + goto End; + } + } + if (bconchar(out, (char)'\r') < 0 || bconchar(out, (char)'\n') < 0) + { + bstrFree(out); + break; + } + } +End:; + return out; } /* bstring bYEncode (const_bstring src) * - * Performs a YEncode of a block of data. No header or tail info is - * appended. See: http://www.yenc.org/whatis.htm and + * Performs a YEncode of a block of data. No header or tail info is + * appended. See: http://www.yenc.org/whatis.htm and * http://www.yenc.org/yenc-draft.1.3.txt */ -bstring bYEncode (const_bstring src) { -int i; -bstring out; -unsigned char c; - - if (src == NULL || src->slen < 0 || src->data == NULL) return NULL; - if ((out = bfromcstr ("")) == NULL) return NULL; - for (i=0; i < src->slen; i++) { - c = (unsigned char)(src->data[i] + 42); - if (c == '=' || c == '\0' || c == '\r' || c == '\n') { - if (0 > bconchar (out, (char) '=')) { - bdestroy (out); - return NULL; - } - c += (unsigned char) 64; - } - if (0 > bconchar (out, c)) { - bdestroy (out); - return NULL; - } - } - return out; +bstring bYEncode(const_bstring src) +{ + int i; + bstring out; + unsigned char c; + + if (src == NULL || src->slen < 0 || src->data == NULL) return NULL; + if ((out = bfromcstr("")) == NULL) return NULL; + for (i = 0; i < src->slen; i++) + { + c = (unsigned char)(src->data[i] + 42); + if (c == '=' || c == '\0' || c == '\r' || c == '\n') + { + if (0 > bconchar(out, (char)'=')) + { + bdestroy(out); + return NULL; + } + c += (unsigned char)64; + } + if (0 > bconchar(out, c)) + { + bdestroy(out); + return NULL; + } + } + return out; } /* bstring bYDecode (const_bstring src) * - * Performs a YDecode of a block of data. See: + * Performs a YDecode of a block of data. See: * http://www.yenc.org/whatis.htm and http://www.yenc.org/yenc-draft.1.3.txt */ #define MAX_OB_LEN (64) -bstring bYDecode (const_bstring src) { -int i; -bstring out; -unsigned char c; -unsigned char octetbuff[MAX_OB_LEN]; -int obl; - - if (src == NULL || src->slen < 0 || src->data == NULL) return NULL; - if ((out = bfromcstr ("")) == NULL) return NULL; - - obl = 0; - - for (i=0; i < src->slen; i++) { - if ('=' == (c = src->data[i])) { /* The = escape mode */ - i++; - if (i >= src->slen) { - bdestroy (out); - return NULL; - } - c = (unsigned char) (src->data[i] - 64); - } else { - if ('\0' == c) { - bdestroy (out); - return NULL; - } - - /* Extraneous CR/LFs are to be ignored. */ - if (c == '\r' || c == '\n') continue; - } - - octetbuff[obl] = (unsigned char) ((int) c - 42); - obl++; - - if (obl >= MAX_OB_LEN) { - if (0 > bcatblk (out, octetbuff, obl)) { - bdestroy (out); - return NULL; - } - obl = 0; - } - } - - if (0 > bcatblk (out, octetbuff, obl)) { - bdestroy (out); - out = NULL; - } - return out; +bstring bYDecode(const_bstring src) +{ + int i; + bstring out; + unsigned char c; + unsigned char octetbuff[MAX_OB_LEN]; + int obl; + + if (src == NULL || src->slen < 0 || src->data == NULL) return NULL; + if ((out = bfromcstr("")) == NULL) return NULL; + + obl = 0; + + for (i = 0; i < src->slen; i++) + { + if ('=' == (c = src->data[i])) /* The = escape mode */ + { + i++; + if (i >= src->slen) + { + bdestroy(out); + return NULL; + } + c = (unsigned char)(src->data[i] - 64); + } + else + { + if ('\0' == c) + { + bdestroy(out); + return NULL; + } + + /* Extraneous CR/LFs are to be ignored. */ + if (c == '\r' || c == '\n') continue; + } + + octetbuff[obl] = (unsigned char)((int)c - 42); + obl++; + + if (obl >= MAX_OB_LEN) + { + if (0 > bcatblk(out, octetbuff, obl)) + { + bdestroy(out); + return NULL; + } + obl = 0; + } + } + + if (0 > bcatblk(out, octetbuff, obl)) + { + bdestroy(out); + out = NULL; + } + return out; } /* bstring bStrfTime (const char * fmt, const struct tm * timeptr) * * Takes a format string that is compatible with strftime and a struct tm * pointer, formats the time according to the format string and outputs - * the bstring as a result. Note that if there is an early generation of a + * the bstring as a result. Note that if there is an early generation of a * '\0' character, the bstring will be truncated to this end point. */ -bstring bStrfTime (const char * fmt, const struct tm * timeptr) { -#if defined (__TURBOC__) && !defined (__BORLANDC__) -static struct tagbstring ns = bsStatic ("bStrfTime Not supported"); - fmt = fmt; - timeptr = timeptr; - return &ns; +bstring bStrfTime(const char * fmt, const struct tm * timeptr) +{ +#if defined(__TURBOC__) && !defined(__BORLANDC__) + static struct tagbstring ns = bsStatic("bStrfTime Not supported"); + fmt = fmt; + timeptr = timeptr; + return &ns; #else -bstring buff; -int n; -size_t r; + bstring buff; + int n; + size_t r; - if (fmt == NULL) return NULL; + if (fmt == NULL) return NULL; - /* Since the length is not determinable beforehand, a search is - performed using the truncating "strftime" call on increasing - potential sizes for the output result. */ + /* Since the length is not determinable beforehand, a search is + performed using the truncating "strftime" call on increasing + potential sizes for the output result. */ - if ((n = (int) (2*strlen (fmt))) < 16) n = 16; - buff = bfromcstralloc (n+2, ""); + if ((n = (int)(2 * strlen(fmt))) < 16) n = 16; + buff = bfromcstralloc(n + 2, ""); - for (;;) { - if (BSTR_OK != balloc (buff, n + 2)) { - bdestroy (buff); - return NULL; - } + for (;;) + { + if (BSTR_OK != balloc(buff, n + 2)) + { + bdestroy(buff); + return NULL; + } - r = strftime ((char *) buff->data, n + 1, fmt, timeptr); + r = strftime((char *)buff->data, n + 1, fmt, timeptr); - if (r > 0) { - buff->slen = (int) r; - break; - } + if (r > 0) + { + buff->slen = (int)r; + break; + } - n += n; - } + n += n; + } - return buff; + return buff; #endif } @@ -882,23 +1006,25 @@ size_t r; * Sets the character at position pos to the character c in the bstring a. * If the character c is NUL ('\0') then the string is truncated at this * point. Note: this does not enable any other '\0' character in the bstring - * as terminator indicator for the string. pos must be in the position + * as terminator indicator for the string. pos must be in the position * between 0 and b->slen inclusive, otherwise BSTR_ERR will be returned. */ -int bSetCstrChar (bstring b, int pos, char c) { - if (NULL == b || b->mlen <= 0 || b->slen < 0 || b->mlen < b->slen) - return BSTR_ERR; - if (pos < 0 || pos > b->slen) return BSTR_ERR; - - if (pos == b->slen) { - if ('\0' != c) return bconchar (b, c); - return 0; - } - - b->data[pos] = (unsigned char) c; - if ('\0' == c) b->slen = pos; - - return 0; +int bSetCstrChar(bstring b, int pos, char c) +{ + if (NULL == b || b->mlen <= 0 || b->slen < 0 || b->mlen < b->slen) + return BSTR_ERR; + if (pos < 0 || pos > b->slen) return BSTR_ERR; + + if (pos == b->slen) + { + if ('\0' != c) return bconchar(b, c); + return 0; + } + + b->data[pos] = (unsigned char)c; + if ('\0' == c) b->slen = pos; + + return 0; } /* int bSetChar (bstring b, int pos, char c) @@ -908,22 +1034,24 @@ int bSetCstrChar (bstring b, int pos, char c) { * be in the position between 0 and b->slen inclusive, otherwise BSTR_ERR * will be returned. */ -int bSetChar (bstring b, int pos, char c) { - if (NULL == b || b->mlen <= 0 || b->slen < 0 || b->mlen < b->slen) - return BSTR_ERR; - if (pos < 0 || pos > b->slen) return BSTR_ERR; - - if (pos == b->slen) { - return bconchar (b, c); - } - - b->data[pos] = (unsigned char) c; - return 0; +int bSetChar(bstring b, int pos, char c) +{ + if (NULL == b || b->mlen <= 0 || b->slen < 0 || b->mlen < b->slen) + return BSTR_ERR; + if (pos < 0 || pos > b->slen) return BSTR_ERR; + + if (pos == b->slen) + { + return bconchar(b, c); + } + + b->data[pos] = (unsigned char)c; + return 0; } #define INIT_SECURE_INPUT_LENGTH (256) -/* bstring bSecureInput (int maxlen, int termchar, +/* bstring bSecureInput (int maxlen, int termchar, * bNgetc vgetchar, void * vgcCtx) * * Read input from an abstracted input interface, for a length of at most @@ -932,49 +1060,52 @@ int bSetChar (bstring b, int pos, char c) { * or the user specified value termchar. * */ -bstring bSecureInput (int maxlen, int termchar, bNgetc vgetchar, void * vgcCtx) { -int i, m, c; -bstring b, t; - - if (!vgetchar) return NULL; - - b = bfromcstralloc (INIT_SECURE_INPUT_LENGTH, ""); - if ((c = UCHAR_MAX + 1) == termchar) c++; - - for (i=0; ; i++) { - if (termchar == c || (maxlen > 0 && i >= maxlen)) c = EOF; - else c = vgetchar (vgcCtx); - - if (EOF == c) break; - - if (i+1 >= b->mlen) { - - /* Double size, but deal with unusual case of numeric - overflows */ - - if ((m = b->mlen << 1) <= b->mlen && - (m = b->mlen + 1024) <= b->mlen && - (m = b->mlen + 16) <= b->mlen && - (m = b->mlen + 1) <= b->mlen) t = NULL; - else t = bfromcstralloc (m, ""); - - if (t) memcpy (t->data, b->data, i); - bSecureDestroy (b); /* Cleanse previous buffer */ - b = t; - if (!b) return b; - } - - b->data[i] = (unsigned char) c; - } - - b->slen = i; - b->data[i] = (unsigned char) '\0'; - return b; +bstring bSecureInput(int maxlen, int termchar, bNgetc vgetchar, void * vgcCtx) +{ + int i, m, c; + bstring b, t; + + if (!vgetchar) return NULL; + + b = bfromcstralloc(INIT_SECURE_INPUT_LENGTH, ""); + if ((c = UCHAR_MAX + 1) == termchar) c++; + + for (i = 0;; i++) + { + if (termchar == c || (maxlen > 0 && i >= maxlen)) c = EOF; + else c = vgetchar(vgcCtx); + + if (EOF == c) break; + + if (i + 1 >= b->mlen) + { + /* Double size, but deal with unusual case of numeric + overflows */ + + if ((m = b->mlen << 1) <= b->mlen && + (m = b->mlen + 1024) <= b->mlen && + (m = b->mlen + 16) <= b->mlen && + (m = b->mlen + 1) <= b->mlen) t = NULL; + else t = bfromcstralloc(m, ""); + + if (t) memcpy(t->data, b->data, i); + bSecureDestroy(b); /* Cleanse previous buffer */ + b = t; + if (!b) return b; + } + + b->data[i] = (unsigned char)c; + } + + b->slen = i; + b->data[i] = (unsigned char)'\0'; + return b; } #define BWS_BUFF_SZ (1024) -struct bwriteStream { +struct bwriteStream +{ bstring buff; /* Buffer for underwrites */ void * parm; /* The stream handle for core stream */ bNwrite writeFn; /* fwrite work-a-like fnptr for core stream */ @@ -984,48 +1115,54 @@ struct bwriteStream { /* struct bwriteStream * bwsOpen (bNwrite writeFn, void * parm) * - * Wrap a given open stream (described by a fwrite work-a-like function + * Wrap a given open stream (described by a fwrite work-a-like function * pointer and stream handle) into an open bwriteStream suitable for write * streaming functions. */ -struct bwriteStream * bwsOpen (bNwrite writeFn, void * parm) { -struct bwriteStream * ws; - - if (NULL == writeFn) return NULL; - ws = (struct bwriteStream *) malloc (sizeof (struct bwriteStream)); - if (ws) { - if (NULL == (ws->buff = bfromcstr (""))) { - free (ws); - ws = NULL; - } else { - ws->parm = parm; - ws->writeFn = writeFn; - ws->isEOF = 0; - ws->minBuffSz = BWS_BUFF_SZ; - } - } - return ws; +struct bwriteStream * bwsOpen(bNwrite writeFn, void * parm) +{ + struct bwriteStream * ws; + + if (NULL == writeFn) return NULL; + ws = (struct bwriteStream *)malloc(sizeof(struct bwriteStream)); + if (ws) + { + if (NULL == (ws->buff = bfromcstr(""))) + { + free(ws); + ws = NULL; + } + else + { + ws->parm = parm; + ws->writeFn = writeFn; + ws->isEOF = 0; + ws->minBuffSz = BWS_BUFF_SZ; + } + } + return ws; } -#define internal_bwswriteout(ws,b) { \ - if ((b)->slen > 0) { \ - if (1 != (ws->writeFn ((b)->data, (b)->slen, 1, ws->parm))) { \ - ws->isEOF = 1; \ - return BSTR_ERR; \ - } \ - } \ +#define internal_bwswriteout(ws, b) {\ + if ((b)->slen > 0) { \ + if (1 != (ws->writeFn ((b)->data, (b)->slen, 1, ws->parm))) { \ + ws->isEOF = 1; \ + return BSTR_ERR; \ + } \ + } \ } /* int bwsWriteFlush (struct bwriteStream * ws) * * Force any pending data to be written to the core stream. */ -int bwsWriteFlush (struct bwriteStream * ws) { - if (NULL == ws || ws->isEOF || 0 >= ws->minBuffSz || - NULL == ws->writeFn || NULL == ws->buff) return BSTR_ERR; - internal_bwswriteout (ws, ws->buff); - ws->buff->slen = 0; - return 0; +int bwsWriteFlush(struct bwriteStream * ws) +{ + if (NULL == ws || ws->isEOF || 0 >= ws->minBuffSz || + NULL == ws->writeFn || NULL == ws->buff) return BSTR_ERR; + internal_bwswriteout(ws, ws->buff); + ws->buff->slen = 0; + return 0; } /* int bwsWriteBstr (struct bwriteStream * ws, const_bstring b) @@ -1034,96 +1171,103 @@ int bwsWriteFlush (struct bwriteStream * ws) { * returned. Note that there is no deterministic way to determine the exact * cut off point where the core stream stopped accepting data. */ -int bwsWriteBstr (struct bwriteStream * ws, const_bstring b) { -struct tagbstring t; -int l; - - if (NULL == ws || NULL == b || NULL == ws->buff || - ws->isEOF || 0 >= ws->minBuffSz || NULL == ws->writeFn) - return BSTR_ERR; - - /* Buffer prepacking optimization */ - if (b->slen > 0 && ws->buff->mlen - ws->buff->slen > b->slen) { - static struct tagbstring empty = bsStatic (""); - if (0 > bconcat (ws->buff, b)) return BSTR_ERR; - return bwsWriteBstr (ws, &empty); - } - - if (0 > (l = ws->minBuffSz - ws->buff->slen)) { - internal_bwswriteout (ws, ws->buff); - ws->buff->slen = 0; - l = ws->minBuffSz; - } - - if (b->slen < l) return bconcat (ws->buff, b); - - if (0 > bcatblk (ws->buff, b->data, l)) return BSTR_ERR; - internal_bwswriteout (ws, ws->buff); - ws->buff->slen = 0; - - bmid2tbstr (t, (bstring) b, l, b->slen); - - if (t.slen >= ws->minBuffSz) { - internal_bwswriteout (ws, &t); - return 0; - } - - return bassign (ws->buff, &t); +int bwsWriteBstr(struct bwriteStream * ws, const_bstring b) +{ + struct tagbstring t; + int l; + + if (NULL == ws || NULL == b || NULL == ws->buff || + ws->isEOF || 0 >= ws->minBuffSz || NULL == ws->writeFn) + return BSTR_ERR; + + /* Buffer prepacking optimization */ + if (b->slen > 0 && ws->buff->mlen - ws->buff->slen > b->slen) + { + static struct tagbstring empty = bsStatic(""); + if (0 > bconcat(ws->buff, b)) return BSTR_ERR; + return bwsWriteBstr(ws, &empty); + } + + if (0 > (l = ws->minBuffSz - ws->buff->slen)) + { + internal_bwswriteout(ws, ws->buff); + ws->buff->slen = 0; + l = ws->minBuffSz; + } + + if (b->slen < l) return bconcat(ws->buff, b); + + if (0 > bcatblk(ws->buff, b->data, l)) return BSTR_ERR; + internal_bwswriteout(ws, ws->buff); + ws->buff->slen = 0; + + bmid2tbstr(t, (bstring)b, l, b->slen); + + if (t.slen >= ws->minBuffSz) + { + internal_bwswriteout(ws, &t); + return 0; + } + + return bassign(ws->buff, &t); } /* int bwsWriteBlk (struct bwriteStream * ws, void * blk, int len) * - * Send a block of data a bwriteStream. If the stream is at EOF BSTR_ERR is + * Send a block of data a bwriteStream. If the stream is at EOF BSTR_ERR is * returned. */ -int bwsWriteBlk (struct bwriteStream * ws, void * blk, int len) { -struct tagbstring t; - if (NULL == blk || len < 0) return BSTR_ERR; - blk2tbstr (t, blk, len); - return bwsWriteBstr (ws, &t); +int bwsWriteBlk(struct bwriteStream * ws, void * blk, int len) +{ + struct tagbstring t; + if (NULL == blk || len < 0) return BSTR_ERR; + blk2tbstr(t, blk, len); + return bwsWriteBstr(ws, &t); } /* int bwsIsEOF (const struct bwriteStream * ws) * - * Returns 0 if the stream is currently writable, 1 if the core stream has + * Returns 0 if the stream is currently writable, 1 if the core stream has * responded by not accepting the previous attempted write. */ -int bwsIsEOF (const struct bwriteStream * ws) { - if (NULL == ws || NULL == ws->buff || 0 > ws->minBuffSz || - NULL == ws->writeFn) return BSTR_ERR; - return ws->isEOF; +int bwsIsEOF(const struct bwriteStream * ws) +{ + if (NULL == ws || NULL == ws->buff || 0 > ws->minBuffSz || + NULL == ws->writeFn) return BSTR_ERR; + return ws->isEOF; } /* int bwsBuffLength (struct bwriteStream * ws, int sz) * - * Set the length of the buffer used by the bwsStream. If sz is zero, the + * Set the length of the buffer used by the bwsStream. If sz is zero, the * length is not set. This function returns with the previous length. */ -int bwsBuffLength (struct bwriteStream * ws, int sz) { -int oldSz; - if (ws == NULL || sz < 0) return BSTR_ERR; - oldSz = ws->minBuffSz; - if (sz > 0) ws->minBuffSz = sz; - return oldSz; +int bwsBuffLength(struct bwriteStream * ws, int sz) +{ + int oldSz; + if (ws == NULL || sz < 0) return BSTR_ERR; + oldSz = ws->minBuffSz; + if (sz > 0) ws->minBuffSz = sz; + return oldSz; } /* void * bwsClose (struct bwriteStream * s) * - * Close the bwriteStream, and return the handle to the stream that was + * Close the bwriteStream, and return the handle to the stream that was * originally used to open the given stream. Note that even if the stream * is at EOF it still needs to be closed with a call to bwsClose. */ -void * bwsClose (struct bwriteStream * ws) { -void * parm; - if (NULL == ws || NULL == ws->buff || 0 >= ws->minBuffSz || - NULL == ws->writeFn) return NULL; - bwsWriteFlush (ws); - parm = ws->parm; - ws->parm = NULL; - ws->minBuffSz = -1; - ws->writeFn = NULL; - bstrFree (ws->buff); - free (ws); - return parm; +void * bwsClose(struct bwriteStream * ws) +{ + void * parm; + if (NULL == ws || NULL == ws->buff || 0 >= ws->minBuffSz || + NULL == ws->writeFn) return NULL; + bwsWriteFlush(ws); + parm = ws->parm; + ws->parm = NULL; + ws->minBuffSz = -1; + ws->writeFn = NULL; + bstrFree(ws->buff); + free(ws); + return parm; } - diff --git a/src/cbstring/bstraux.h b/src/cbstring/bstraux.h index 17d4ea7..e31929f 100644 --- a/src/cbstring/bstraux.h +++ b/src/cbstring/bstraux.h @@ -1,7 +1,7 @@ /* * This source file is part of the bstring string library. This code was - * written by Paul Hsieh in 2002-2010, and is covered by either the 3-clause - * BSD open source license or GPL v2.0. Refer to the accompanying documentation + * written by Paul Hsieh in 2002-2010, and is covered by either the 3-clause + * BSD open source license or GPL v2.0. Refer to the accompanying documentation * for details on usage and license. */ @@ -9,7 +9,7 @@ * bstraux.h * * This file is not a necessary part of the core bstring library itself, but - * is just an auxilliary module which includes miscellaneous or trivial + * is just an auxilliary module which includes miscellaneous or trivial * functions. */ @@ -24,86 +24,86 @@ extern "C" { #endif /* Safety mechanisms */ -#define bstrDeclare(b) bstring (b) = NULL; +#define bstrDeclare(b) bstring (b) = NULL; #define bstrFree(b) {if ((b) != NULL && (b)->slen >= 0 && (b)->mlen >= (b)->slen) { bdestroy (b); (b) = NULL; }} /* Backward compatibilty with previous versions of Bstrlib */ -#define bAssign(a,b) ((bassign)((a), (b))) -#define bSubs(b,pos,len,a,c) ((breplace)((b),(pos),(len),(a),(unsigned char)(c))) -#define bStrchr(b,c) ((bstrchr)((b), (c))) -#define bStrchrFast(b,c) ((bstrchr)((b), (c))) -#define bCatCstr(b,s) ((bcatcstr)((b), (s))) -#define bCatBlk(b,s,len) ((bcatblk)((b),(s),(len))) -#define bCatStatic(b,s) bCatBlk ((b), ("" s ""), sizeof (s) - 1) -#define bTrunc(b,n) ((btrunc)((b), (n))) -#define bReplaceAll(b,find,repl,pos) ((bfindreplace)((b),(find),(repl),(pos))) +#define bAssign(a, b) ((bassign)((a), (b))) +#define bSubs(b, pos, len, a, c) ((breplace)((b),(pos),(len),(a),(unsigned char)(c))) +#define bStrchr(b, c) ((bstrchr)((b), (c))) +#define bStrchrFast(b, c) ((bstrchr)((b), (c))) +#define bCatCstr(b, s) ((bcatcstr)((b), (s))) +#define bCatBlk(b, s, len) ((bcatblk)((b),(s),(len))) +#define bCatStatic(b, s) bCatBlk ((b), ("" s ""), sizeof (s) - 1) +#define bTrunc(b, n) ((btrunc)((b), (n))) +#define bReplaceAll(b, find, repl, pos) ((bfindreplace)((b),(find),(repl),(pos))) #define bUppercase(b) ((btoupper)(b)) #define bLowercase(b) ((btolower)(b)) -#define bCaselessCmp(a,b) ((bstricmp)((a), (b))) -#define bCaselessNCmp(a,b,n) ((bstrnicmp)((a), (b), (n))) +#define bCaselessCmp(a, b) ((bstricmp)((a), (b))) +#define bCaselessNCmp(a, b, n) ((bstrnicmp)((a), (b), (n))) #define bBase64Decode(b) (bBase64DecodeEx ((b), NULL)) #define bUuDecode(b) (bUuDecodeEx ((b), NULL)) /* Unusual functions */ -extern struct bStream * bsFromBstr (const_bstring b); -extern bstring bTail (bstring b, int n); -extern bstring bHead (bstring b, int n); -extern int bSetCstrChar (bstring a, int pos, char c); -extern int bSetChar (bstring b, int pos, char c); -extern int bFill (bstring a, char c, int len); -extern int bReplicate (bstring b, int n); -extern int bReverse (bstring b); -extern int bInsertChrs (bstring b, int pos, int len, unsigned char c, unsigned char fill); -extern bstring bStrfTime (const char * fmt, const struct tm * timeptr); +extern struct bStream * bsFromBstr(const_bstring b); +extern bstring bTail(bstring b, int n); +extern bstring bHead(bstring b, int n); +extern int bSetCstrChar(bstring a, int pos, char c); +extern int bSetChar(bstring b, int pos, char c); +extern int bFill(bstring a, char c, int len); +extern int bReplicate(bstring b, int n); +extern int bReverse(bstring b); +extern int bInsertChrs(bstring b, int pos, int len, unsigned char c, unsigned char fill); +extern bstring bStrfTime(const char * fmt, const struct tm * timeptr); #define bAscTime(t) (bStrfTime ("%c\n", (t))) #define bCTime(t) ((t) ? bAscTime (localtime (t)) : NULL) /* Spacing formatting */ -extern int bJustifyLeft (bstring b, int space); -extern int bJustifyRight (bstring b, int width, int space); -extern int bJustifyMargin (bstring b, int width, int space); -extern int bJustifyCenter (bstring b, int width, int space); +extern int bJustifyLeft(bstring b, int space); +extern int bJustifyRight(bstring b, int width, int space); +extern int bJustifyMargin(bstring b, int width, int space); +extern int bJustifyCenter(bstring b, int width, int space); /* Esoteric standards specific functions */ -extern char * bStr2NetStr (const_bstring b); -extern bstring bNetStr2Bstr (const char * buf); -extern bstring bBase64Encode (const_bstring b); -extern bstring bBase64DecodeEx (const_bstring b, int * boolTruncError); -extern struct bStream * bsUuDecode (struct bStream * sInp, int * badlines); -extern bstring bUuDecodeEx (const_bstring src, int * badlines); -extern bstring bUuEncode (const_bstring src); -extern bstring bYEncode (const_bstring src); -extern bstring bYDecode (const_bstring src); +extern char * bStr2NetStr(const_bstring b); +extern bstring bNetStr2Bstr(const char * buf); +extern bstring bBase64Encode(const_bstring b); +extern bstring bBase64DecodeEx(const_bstring b, int * boolTruncError); +extern struct bStream * bsUuDecode(struct bStream * sInp, int * badlines); +extern bstring bUuDecodeEx(const_bstring src, int * badlines); +extern bstring bUuEncode(const_bstring src); +extern bstring bYEncode(const_bstring src); +extern bstring bYDecode(const_bstring src); /* Writable stream */ typedef int (* bNwrite) (const void * buf, size_t elsize, size_t nelem, void * parm); -struct bwriteStream * bwsOpen (bNwrite writeFn, void * parm); -int bwsWriteBstr (struct bwriteStream * stream, const_bstring b); -int bwsWriteBlk (struct bwriteStream * stream, void * blk, int len); -int bwsWriteFlush (struct bwriteStream * stream); -int bwsIsEOF (const struct bwriteStream * stream); -int bwsBuffLength (struct bwriteStream * stream, int sz); -void * bwsClose (struct bwriteStream * stream); +struct bwriteStream * bwsOpen(bNwrite writeFn, void * parm); +int bwsWriteBstr(struct bwriteStream * stream, const_bstring b); +int bwsWriteBlk(struct bwriteStream * stream, void * blk, int len); +int bwsWriteFlush(struct bwriteStream * stream); +int bwsIsEOF(const struct bwriteStream * stream); +int bwsBuffLength(struct bwriteStream * stream, int sz); +void * bwsClose(struct bwriteStream * stream); /* Security functions */ -#define bSecureDestroy(b) { \ -bstring bstr__tmp = (b); \ - if (bstr__tmp && bstr__tmp->mlen > 0 && bstr__tmp->data) { \ - (void) memset (bstr__tmp->data, 0, (size_t) bstr__tmp->mlen); \ - bdestroy (bstr__tmp); \ - } \ +#define bSecureDestroy(b) { \ +bstring bstr__tmp = (b); \ + if (bstr__tmp && bstr__tmp->mlen > 0 && bstr__tmp->data) { \ + (void) memset (bstr__tmp->data, 0, (size_t) bstr__tmp->mlen); \ + bdestroy (bstr__tmp); \ + } \ } -#define bSecureWriteProtect(t) { \ - if ((t).mlen >= 0) { \ - if ((t).mlen > (t).slen)) { \ - (void) memset ((t).data + (t).slen, 0, (size_t) (t).mlen - (t).slen); \ - } \ - (t).mlen = -1; \ - } \ +#define bSecureWriteProtect(t) { \ + if ((t).mlen >= 0) { \ + if ((t).mlen > (t).slen)) { \ + (void) memset ((t).data + (t).slen, 0, (size_t) (t).mlen - (t).slen); \ + } \ + (t).mlen = -1; \ + } \ } -extern bstring bSecureInput (int maxlen, int termchar, - bNgetc vgetchar, void * vgcCtx); +extern bstring bSecureInput(int maxlen, int termchar, + bNgetc vgetchar, void * vgcCtx); #ifdef __cplusplus } diff --git a/src/cbstring/bstrlib.c b/src/cbstring/bstrlib.c index acc6b1a..e1a8590 100644 --- a/src/cbstring/bstrlib.c +++ b/src/cbstring/bstrlib.c @@ -1,7 +1,7 @@ /* * This source file is part of the bstring string library. This code was - * written by Paul Hsieh in 2002-2010, and is covered by either the 3-clause - * BSD open source license or GPL v2.0. Refer to the accompanying documentation + * written by Paul Hsieh in 2002-2010, and is covered by either the 3-clause + * BSD open source license or GPL v2.0. Refer to the accompanying documentation * for details on usage and license. */ @@ -34,117 +34,127 @@ #endif #ifndef bstr__realloc -#define bstr__realloc(p,x) realloc ((p), (x)) +#define bstr__realloc(p, x) realloc ((p), (x)) #endif #ifndef bstr__memcpy -#define bstr__memcpy(d,s,l) memcpy ((d), (s), (l)) +#define bstr__memcpy(d, s, l) memcpy ((d), (s), (l)) #endif #ifndef bstr__memmove -#define bstr__memmove(d,s,l) memmove ((d), (s), (l)) +#define bstr__memmove(d, s, l) memmove ((d), (s), (l)) #endif #ifndef bstr__memset -#define bstr__memset(d,c,l) memset ((d), (c), (l)) +#define bstr__memset(d, c, l) memset ((d), (c), (l)) #endif #ifndef bstr__memcmp -#define bstr__memcmp(d,c,l) memcmp ((d), (c), (l)) +#define bstr__memcmp(d, c, l) memcmp ((d), (c), (l)) #endif #ifndef bstr__memchr -#define bstr__memchr(s,c,l) memchr ((s), (c), (l)) +#define bstr__memchr(s, c, l) memchr ((s), (c), (l)) #endif /* Just a length safe wrapper for memmove. */ -#define bBlockCopy(D,S,L) { if ((L) > 0) bstr__memmove ((D),(S),(L)); } +#define bBlockCopy(D, S, L) { if ((L) > 0) bstr__memmove ((D),(S),(L)); } /* Compute the snapped size for a given requested size. By snapping to powers of 2 like this, repeated reallocations are avoided. */ -static int snapUpSize (int i) { - if (i < 8) { - i = 8; - } else { - unsigned int j; - j = (unsigned int) i; - - j |= (j >> 1); - j |= (j >> 2); - j |= (j >> 4); - j |= (j >> 8); /* Ok, since int >= 16 bits */ +static int snapUpSize(int i) +{ + if (i < 8) + { + i = 8; + } + else + { + unsigned int j; + j = (unsigned int)i; + + j |= (j >> 1); + j |= (j >> 2); + j |= (j >> 4); + j |= (j >> 8); /* Ok, since int >= 16 bits */ #if (UINT_MAX != 0xffff) - j |= (j >> 16); /* For 32 bit int systems */ + j |= (j >> 16); /* For 32 bit int systems */ #if (UINT_MAX > 0xffffffffUL) - j |= (j >> 32); /* For 64 bit int systems */ + j |= (j >> 32); /* For 64 bit int systems */ #endif #endif - /* Least power of two greater than i */ - j++; - if ((int) j >= i) i = (int) j; - } - return i; + /* Least power of two greater than i */ + j++; + if ((int)j >= i) i = (int)j; + } + return i; } /* int balloc (bstring b, int len) * * Increase the size of the memory backing the bstring b to at least len. */ -int balloc (bstring b, int olen) { - int len; - if (b == NULL || b->data == NULL || b->slen < 0 || b->mlen <= 0 || - b->mlen < b->slen || olen <= 0) { - return BSTR_ERR; - } - - if (olen >= b->mlen) { - unsigned char * x; - - if ((len = snapUpSize (olen)) <= b->mlen) return BSTR_OK; - - /* Assume probability of a non-moving realloc is 0.125 */ - if (7 * b->mlen < 8 * b->slen) { - - /* If slen is close to mlen in size then use realloc to reduce - the memory defragmentation */ - - reallocStrategy:; - - x = (unsigned char *) bstr__realloc (b->data, (size_t) len); - if (x == NULL) { - - /* Since we failed, try allocating the tighest possible - allocation */ - - if (NULL == (x = (unsigned char *) bstr__realloc (b->data, (size_t) (len = olen)))) { - return BSTR_ERR; - } - } - } else { - - /* If slen is not close to mlen then avoid the penalty of copying - the extra bytes that are allocated, but not considered part of - the string */ - - if (NULL == (x = (unsigned char *) bstr__alloc ((size_t) len))) { - - /* Perhaps there is no available memory for the two - allocations to be in memory at once */ - - goto reallocStrategy; - - } else { - if (b->slen) bstr__memcpy ((char *) x, (char *) b->data, (size_t) b->slen); - bstr__free (b->data); - } - } - b->data = x; - b->mlen = len; - b->data[b->slen] = (unsigned char) '\0'; - } - - return BSTR_OK; +int balloc(bstring b, int olen) +{ + int len; + if (b == NULL || b->data == NULL || b->slen < 0 || b->mlen <= 0 || + b->mlen < b->slen || olen <= 0) + { + return BSTR_ERR; + } + + if (olen >= b->mlen) + { + unsigned char * x; + + if ((len = snapUpSize(olen)) <= b->mlen) return BSTR_OK; + + /* Assume probability of a non-moving realloc is 0.125 */ + if (7 * b->mlen < 8 * b->slen) + { + /* If slen is close to mlen in size then use realloc to reduce + the memory defragmentation */ + + reallocStrategy:; + + x = (unsigned char *)bstr__realloc(b->data, (size_t)len); + if (x == NULL) + { + /* Since we failed, try allocating the tighest possible + allocation */ + + if (NULL == (x = (unsigned char *)bstr__realloc(b->data, (size_t)(len = olen)))) + { + return BSTR_ERR; + } + } + } + else + { + /* If slen is not close to mlen then avoid the penalty of copying + the extra bytes that are allocated, but not considered part of + the string */ + + if (NULL == (x = (unsigned char *)bstr__alloc((size_t)len))) + { + /* Perhaps there is no available memory for the two + allocations to be in memory at once */ + + goto reallocStrategy; + } + else + { + if (b->slen) bstr__memcpy((char *)x, (char *)b->data, (size_t)b->slen); + bstr__free(b->data); + } + } + b->data = x; + b->mlen = len; + b->data[b->slen] = (unsigned char)'\0'; + } + + return BSTR_OK; } /* int ballocmin (bstring b, int len) @@ -153,25 +163,28 @@ int balloc (bstring b, int olen) { * whichever is larger. Note that repeated use of this function can degrade * performance. */ -int ballocmin (bstring b, int len) { - unsigned char * s; +int ballocmin(bstring b, int len) +{ + unsigned char * s; - if (b == NULL || b->data == NULL || (b->slen+1) < 0 || b->mlen <= 0 || - b->mlen < b->slen || len <= 0) { - return BSTR_ERR; - } + if (b == NULL || b->data == NULL || (b->slen + 1) < 0 || b->mlen <= 0 || + b->mlen < b->slen || len <= 0) + { + return BSTR_ERR; + } - if (len < b->slen + 1) len = b->slen + 1; + if (len < b->slen + 1) len = b->slen + 1; - if (len != b->mlen) { - s = (unsigned char *) bstr__realloc (b->data, (size_t) len); - if (NULL == s) return BSTR_ERR; - s[b->slen] = (unsigned char) '\0'; - b->data = s; - b->mlen = len; - } + if (len != b->mlen) + { + s = (unsigned char *)bstr__realloc(b->data, (size_t)len); + if (NULL == s) return BSTR_ERR; + s[b->slen] = (unsigned char)'\0'; + b->data = s; + b->mlen = len; + } - return BSTR_OK; + return BSTR_OK; } /* bstring bfromcstr (const char * str) @@ -179,376 +192,414 @@ int ballocmin (bstring b, int len) { * Create a bstring which contains the contents of the '\0' terminated char * * buffer str. */ -bstring bfromcstr (const char * str) { -bstring b; -int i; -size_t j; +bstring bfromcstr(const char * str) +{ + bstring b; + int i; + size_t j; - if (str == NULL) return NULL; - j = (strlen) (str); - i = snapUpSize ((int) (j + (2 - (j != 0)))); - if (i <= (int) j) return NULL; + if (str == NULL) return NULL; + j = (strlen)(str); + i = snapUpSize((int)(j + (2 - (j != 0)))); + if (i <= (int)j) return NULL; - b = (bstring) bstr__alloc (sizeof (struct tagbstring)); - if (NULL == b) return NULL; - b->slen = (int) j; - if (NULL == (b->data = (unsigned char *) bstr__alloc (b->mlen = i))) { - bstr__free (b); - return NULL; - } + b = (bstring)bstr__alloc(sizeof(struct tagbstring)); + if (NULL == b) return NULL; + b->slen = (int)j; + if (NULL == (b->data = (unsigned char *)bstr__alloc(b->mlen = i))) + { + bstr__free(b); + return NULL; + } - bstr__memcpy (b->data, str, j+1); - return b; + bstr__memcpy(b->data, str, j + 1); + return b; } /* bstring bfromcstralloc (int mlen, const char * str) * * Create a bstring which contains the contents of the '\0' terminated char * - * buffer str. The memory buffer backing the string is at least len + * buffer str. The memory buffer backing the string is at least len * characters in length. */ -bstring bfromcstralloc (int mlen, const char * str) { -bstring b; -int i; -size_t j; +bstring bfromcstralloc(int mlen, const char * str) +{ + bstring b; + int i; + size_t j; - if (str == NULL) return NULL; - j = (strlen) (str); - i = snapUpSize ((int) (j + (2 - (j != 0)))); - if (i <= (int) j) return NULL; + if (str == NULL) return NULL; + j = (strlen)(str); + i = snapUpSize((int)(j + (2 - (j != 0)))); + if (i <= (int)j) return NULL; - b = (bstring) bstr__alloc (sizeof (struct tagbstring)); - if (b == NULL) return NULL; - b->slen = (int) j; - if (i < mlen) i = mlen; + b = (bstring)bstr__alloc(sizeof(struct tagbstring)); + if (b == NULL) return NULL; + b->slen = (int)j; + if (i < mlen) i = mlen; - if (NULL == (b->data = (unsigned char *) bstr__alloc (b->mlen = i))) { - bstr__free (b); - return NULL; - } + if (NULL == (b->data = (unsigned char *)bstr__alloc(b->mlen = i))) + { + bstr__free(b); + return NULL; + } - bstr__memcpy (b->data, str, j+1); - return b; + bstr__memcpy(b->data, str, j + 1); + return b; } /* bstring blk2bstr (const void * blk, int len) * - * Create a bstring which contains the content of the block blk of length + * Create a bstring which contains the content of the block blk of length * len. */ -bstring blk2bstr (const void * blk, int len) { -bstring b; -int i; +bstring blk2bstr(const void * blk, int len) +{ + bstring b; + int i; - if (blk == NULL || len < 0) return NULL; - b = (bstring) bstr__alloc (sizeof (struct tagbstring)); - if (b == NULL) return NULL; - b->slen = len; + if (blk == NULL || len < 0) return NULL; + b = (bstring)bstr__alloc(sizeof(struct tagbstring)); + if (b == NULL) return NULL; + b->slen = len; - i = len + (2 - (len != 0)); - i = snapUpSize (i); + i = len + (2 - (len != 0)); + i = snapUpSize(i); - b->mlen = i; + b->mlen = i; - b->data = (unsigned char *) bstr__alloc ((size_t) b->mlen); - if (b->data == NULL) { - bstr__free (b); - return NULL; - } + b->data = (unsigned char *)bstr__alloc((size_t)b->mlen); + if (b->data == NULL) + { + bstr__free(b); + return NULL; + } - if (len > 0) bstr__memcpy (b->data, blk, (size_t) len); - b->data[len] = (unsigned char) '\0'; + if (len > 0) bstr__memcpy(b->data, blk, (size_t)len); + b->data[len] = (unsigned char)'\0'; - return b; + return b; } /* char * bstr2cstr (const_bstring s, char z) * - * Create a '\0' terminated char * buffer which is equal to the contents of - * the bstring s, except that any contained '\0' characters are converted - * to the character in z. This returned value should be freed with a + * Create a '\0' terminated char * buffer which is equal to the contents of + * the bstring s, except that any contained '\0' characters are converted + * to the character in z. This returned value should be freed with a * bcstrfree () call, by the calling application. */ -char * bstr2cstr (const_bstring b, char z) { -int i, l; -char * r; +char * bstr2cstr(const_bstring b, char z) +{ + int i, l; + char * r; - if (b == NULL || b->slen < 0 || b->data == NULL) return NULL; - l = b->slen; - r = (char *) bstr__alloc ((size_t) (l + 1)); - if (r == NULL) return r; + if (b == NULL || b->slen < 0 || b->data == NULL) return NULL; + l = b->slen; + r = (char *)bstr__alloc((size_t)(l + 1)); + if (r == NULL) return r; - for (i=0; i < l; i ++) { - r[i] = (char) ((b->data[i] == '\0') ? z : (char) (b->data[i])); - } + for (i = 0; i < l; i++) + { + r[i] = (char)((b->data[i] == '\0') ? z : (char)(b->data[i])); + } - r[l] = (unsigned char) '\0'; + r[l] = (unsigned char)'\0'; - return r; + return r; } /* int bcstrfree (char * s) * * Frees a C-string generated by bstr2cstr (). This is normally unnecessary - * since it just wraps a call to bstr__free (), however, if bstr__alloc () - * and bstr__free () have been redefined as a macros within the bstrlib - * module (via defining them in memdbg.h after defining - * BSTRLIB_MEMORY_DEBUG) with some difference in behaviour from the std - * library functions, then this allows a correct way of freeing the memory - * that allows higher level code to be independent from these macro + * since it just wraps a call to bstr__free (), however, if bstr__alloc () + * and bstr__free () have been redefined as a macros within the bstrlib + * module (via defining them in memdbg.h after defining + * BSTRLIB_MEMORY_DEBUG) with some difference in behaviour from the std + * library functions, then this allows a correct way of freeing the memory + * that allows higher level code to be independent from these macro * redefinitions. */ -int bcstrfree (char * s) { - if (s) { - bstr__free (s); - return BSTR_OK; - } - return BSTR_ERR; +int bcstrfree(char * s) +{ + if (s) + { + bstr__free(s); + return BSTR_OK; + } + return BSTR_ERR; } /* int bconcat (bstring b0, const_bstring b1) * * Concatenate the bstring b1 to the bstring b0. */ -int bconcat (bstring b0, const_bstring b1) { -int len, d; -bstring aux = (bstring) b1; - - if (b0 == NULL || b1 == NULL || b0->data == NULL || b1->data == NULL) return BSTR_ERR; - - d = b0->slen; - len = b1->slen; - if ((d | (b0->mlen - d) | len | (d + len)) < 0) return BSTR_ERR; - - if (b0->mlen <= d + len + 1) { - ptrdiff_t pd = b1->data - b0->data; - if (0 <= pd && pd < b0->mlen) { - if (NULL == (aux = bstrcpy (b1))) return BSTR_ERR; - } - if (balloc (b0, d + len + 1) != BSTR_OK) { - if (aux != b1) bdestroy (aux); - return BSTR_ERR; - } - } - - bBlockCopy (&b0->data[d], &aux->data[0], (size_t) len); - b0->data[d + len] = (unsigned char) '\0'; - b0->slen = d + len; - if (aux != b1) bdestroy (aux); - return BSTR_OK; +int bconcat(bstring b0, const_bstring b1) +{ + int len, d; + bstring aux = (bstring)b1; + + if (b0 == NULL || b1 == NULL || b0->data == NULL || b1->data == NULL) return BSTR_ERR; + + d = b0->slen; + len = b1->slen; + if ((d | (b0->mlen - d) | len | (d + len)) < 0) return BSTR_ERR; + + if (b0->mlen <= d + len + 1) + { + ptrdiff_t pd = b1->data - b0->data; + if (0 <= pd && pd < b0->mlen) + { + if (NULL == (aux = bstrcpy(b1))) return BSTR_ERR; + } + if (balloc(b0, d + len + 1) != BSTR_OK) + { + if (aux != b1) bdestroy(aux); + return BSTR_ERR; + } + } + + bBlockCopy(&b0->data[d], &aux->data[0], (size_t)len); + b0->data[d + len] = (unsigned char)'\0'; + b0->slen = d + len; + if (aux != b1) bdestroy(aux); + return BSTR_OK; } /* int bconchar (bstring b, char c) / * * Concatenate the single character c to the bstring b. */ -int bconchar (bstring b, char c) { -int d; +int bconchar(bstring b, char c) +{ + int d; - if (b == NULL) return BSTR_ERR; - d = b->slen; - if ((d | (b->mlen - d)) < 0 || balloc (b, d + 2) != BSTR_OK) return BSTR_ERR; - b->data[d] = (unsigned char) c; - b->data[d + 1] = (unsigned char) '\0'; - b->slen++; - return BSTR_OK; + if (b == NULL) return BSTR_ERR; + d = b->slen; + if ((d | (b->mlen - d)) < 0 || balloc(b, d + 2) != BSTR_OK) return BSTR_ERR; + b->data[d] = (unsigned char)c; + b->data[d + 1] = (unsigned char)'\0'; + b->slen++; + return BSTR_OK; } /* int bcatcstr (bstring b, const char * s) * * Concatenate a char * string to a bstring. */ -int bcatcstr (bstring b, const char * s) { -char * d; -int i, l; +int bcatcstr(bstring b, const char * s) +{ + char * d; + int i, l; - if (b == NULL || b->data == NULL || b->slen < 0 || b->mlen < b->slen - || b->mlen <= 0 || s == NULL) return BSTR_ERR; + if (b == NULL || b->data == NULL || b->slen < 0 || b->mlen < b->slen + || b->mlen <= 0 || s == NULL) return BSTR_ERR; - /* Optimistically concatenate directly */ - l = b->mlen - b->slen; - d = (char *) &b->data[b->slen]; - for (i=0; i < l; i++) { - if ((*d++ = *s++) == '\0') { - b->slen += i; - return BSTR_OK; - } - } - b->slen += i; + /* Optimistically concatenate directly */ + l = b->mlen - b->slen; + d = (char *)&b->data[b->slen]; + for (i = 0; i < l; i++) + { + if ((*d++ = *s++) == '\0') + { + b->slen += i; + return BSTR_OK; + } + } + b->slen += i; - /* Need to explicitely resize and concatenate tail */ - return bcatblk (b, (const void *) s, (int) strlen (s)); + /* Need to explicitely resize and concatenate tail */ + return bcatblk(b, (const void *)s, (int)strlen(s)); } /* int bcatblk (bstring b, const void * s, int len) * * Concatenate a fixed length buffer to a bstring. */ -int bcatblk (bstring b, const void * s, int len) { -int nl; +int bcatblk(bstring b, const void * s, int len) +{ + int nl; - if (b == NULL || b->data == NULL || b->slen < 0 || b->mlen < b->slen - || b->mlen <= 0 || s == NULL || len < 0) return BSTR_ERR; + if (b == NULL || b->data == NULL || b->slen < 0 || b->mlen < b->slen + || b->mlen <= 0 || s == NULL || len < 0) return BSTR_ERR; - if (0 > (nl = b->slen + len)) return BSTR_ERR; /* Overflow? */ - if (b->mlen <= nl && 0 > balloc (b, nl + 1)) return BSTR_ERR; + if (0 > (nl = b->slen + len)) return BSTR_ERR; /* Overflow? */ + if (b->mlen <= nl && 0 > balloc(b, nl + 1)) return BSTR_ERR; - bBlockCopy (&b->data[b->slen], s, (size_t) len); - b->slen = nl; - b->data[nl] = (unsigned char) '\0'; - return BSTR_OK; + bBlockCopy(&b->data[b->slen], s, (size_t)len); + b->slen = nl; + b->data[nl] = (unsigned char)'\0'; + return BSTR_OK; } /* bstring bstrcpy (const_bstring b) * * Create a copy of the bstring b. */ -bstring bstrcpy (const_bstring b) { -bstring b0; -int i,j; +bstring bstrcpy(const_bstring b) +{ + bstring b0; + int i, j; - /* Attempted to copy an invalid string? */ - if (b == NULL || b->slen < 0 || b->data == NULL) return NULL; + /* Attempted to copy an invalid string? */ + if (b == NULL || b->slen < 0 || b->data == NULL) return NULL; - b0 = (bstring) bstr__alloc (sizeof (struct tagbstring)); - if (b0 == NULL) { - /* Unable to allocate memory for string header */ - return NULL; - } + b0 = (bstring)bstr__alloc(sizeof(struct tagbstring)); + if (b0 == NULL) + { + /* Unable to allocate memory for string header */ + return NULL; + } - i = b->slen; - j = snapUpSize (i + 1); + i = b->slen; + j = snapUpSize(i + 1); - b0->data = (unsigned char *) bstr__alloc (j); - if (b0->data == NULL) { - j = i + 1; - b0->data = (unsigned char *) bstr__alloc (j); - if (b0->data == NULL) { - /* Unable to allocate memory for string data */ - bstr__free (b0); - return NULL; - } - } + b0->data = (unsigned char *)bstr__alloc(j); + if (b0->data == NULL) + { + j = i + 1; + b0->data = (unsigned char *)bstr__alloc(j); + if (b0->data == NULL) + { + /* Unable to allocate memory for string data */ + bstr__free(b0); + return NULL; + } + } - b0->mlen = j; - b0->slen = i; + b0->mlen = j; + b0->slen = i; - if (i) bstr__memcpy ((char *) b0->data, (char *) b->data, i); - b0->data[b0->slen] = (unsigned char) '\0'; + if (i) bstr__memcpy((char *)b0->data, (char *)b->data, i); + b0->data[b0->slen] = (unsigned char)'\0'; - return b0; + return b0; } /* int bassign (bstring a, const_bstring b) * * Overwrite the string a with the contents of string b. */ -int bassign (bstring a, const_bstring b) { - if (b == NULL || b->data == NULL || b->slen < 0) - return BSTR_ERR; - if (b->slen != 0) { - if (balloc (a, b->slen) != BSTR_OK) return BSTR_ERR; - bstr__memmove (a->data, b->data, b->slen); - } else { - if (a == NULL || a->data == NULL || a->mlen < a->slen || - a->slen < 0 || a->mlen == 0) - return BSTR_ERR; - } - a->data[b->slen] = (unsigned char) '\0'; - a->slen = b->slen; - return BSTR_OK; +int bassign(bstring a, const_bstring b) +{ + if (b == NULL || b->data == NULL || b->slen < 0) + return BSTR_ERR; + if (b->slen != 0) + { + if (balloc(a, b->slen) != BSTR_OK) return BSTR_ERR; + bstr__memmove(a->data, b->data, b->slen); + } + else + { + if (a == NULL || a->data == NULL || a->mlen < a->slen || + a->slen < 0 || a->mlen == 0) + return BSTR_ERR; + } + a->data[b->slen] = (unsigned char)'\0'; + a->slen = b->slen; + return BSTR_OK; } /* int bassignmidstr (bstring a, const_bstring b, int left, int len) * - * Overwrite the string a with the middle of contents of string b - * starting from position left and running for a length len. left and + * Overwrite the string a with the middle of contents of string b + * starting from position left and running for a length len. left and * len are clamped to the ends of b as with the function bmidstr. */ -int bassignmidstr (bstring a, const_bstring b, int left, int len) { - if (b == NULL || b->data == NULL || b->slen < 0) - return BSTR_ERR; - - if (left < 0) { - len += left; - left = 0; - } - - if (len > b->slen - left) len = b->slen - left; - - if (a == NULL || a->data == NULL || a->mlen < a->slen || - a->slen < 0 || a->mlen == 0) - return BSTR_ERR; - - if (len > 0) { - if (balloc (a, len) != BSTR_OK) return BSTR_ERR; - bstr__memmove (a->data, b->data + left, len); - a->slen = len; - } else { - a->slen = 0; - } - a->data[a->slen] = (unsigned char) '\0'; - return BSTR_OK; +int bassignmidstr(bstring a, const_bstring b, int left, int len) +{ + if (b == NULL || b->data == NULL || b->slen < 0) + return BSTR_ERR; + + if (left < 0) + { + len += left; + left = 0; + } + + if (len > b->slen - left) len = b->slen - left; + + if (a == NULL || a->data == NULL || a->mlen < a->slen || + a->slen < 0 || a->mlen == 0) + return BSTR_ERR; + + if (len > 0) + { + if (balloc(a, len) != BSTR_OK) return BSTR_ERR; + bstr__memmove(a->data, b->data + left, len); + a->slen = len; + } + else + { + a->slen = 0; + } + a->data[a->slen] = (unsigned char)'\0'; + return BSTR_OK; } /* int bassigncstr (bstring a, const char * str) * - * Overwrite the string a with the contents of char * string str. Note that - * the bstring a must be a well defined and writable bstring. If an error + * Overwrite the string a with the contents of char * string str. Note that + * the bstring a must be a well defined and writable bstring. If an error * occurs BSTR_ERR is returned however a may be partially overwritten. */ -int bassigncstr (bstring a, const char * str) { -int i; -size_t len; - if (a == NULL || a->data == NULL || a->mlen < a->slen || - a->slen < 0 || a->mlen == 0 || NULL == str) - return BSTR_ERR; - - for (i=0; i < a->mlen; i++) { - if ('\0' == (a->data[i] = str[i])) { - a->slen = i; - return BSTR_OK; - } - } - - a->slen = i; - len = strlen (str + i); - if (len > INT_MAX || i + len + 1 > INT_MAX || - 0 > balloc (a, (int) (i + len + 1))) return BSTR_ERR; - bBlockCopy (a->data + i, str + i, (size_t) len + 1); - a->slen += (int) len; - return BSTR_OK; +int bassigncstr(bstring a, const char * str) +{ + int i; + size_t len; + if (a == NULL || a->data == NULL || a->mlen < a->slen || + a->slen < 0 || a->mlen == 0 || NULL == str) + return BSTR_ERR; + + for (i = 0; i < a->mlen; i++) + { + if ('\0' == (a->data[i] = str[i])) + { + a->slen = i; + return BSTR_OK; + } + } + + a->slen = i; + len = strlen(str + i); + if (len > INT_MAX || i + len + 1 > INT_MAX || + 0 > balloc(a, (int)(i + len + 1))) return BSTR_ERR; + bBlockCopy(a->data + i, str + i, (size_t)len + 1); + a->slen += (int)len; + return BSTR_OK; } /* int bassignblk (bstring a, const void * s, int len) * - * Overwrite the string a with the contents of the block (s, len). Note that - * the bstring a must be a well defined and writable bstring. If an error + * Overwrite the string a with the contents of the block (s, len). Note that + * the bstring a must be a well defined and writable bstring. If an error * occurs BSTR_ERR is returned and a is not overwritten. */ -int bassignblk (bstring a, const void * s, int len) { - if (a == NULL || a->data == NULL || a->mlen < a->slen || - a->slen < 0 || a->mlen == 0 || NULL == s || len + 1 < 1) - return BSTR_ERR; - if (len + 1 > a->mlen && 0 > balloc (a, len + 1)) return BSTR_ERR; - bBlockCopy (a->data, s, (size_t) len); - a->data[len] = (unsigned char) '\0'; - a->slen = len; - return BSTR_OK; +int bassignblk(bstring a, const void * s, int len) +{ + if (a == NULL || a->data == NULL || a->mlen < a->slen || + a->slen < 0 || a->mlen == 0 || NULL == s || len + 1 < 1) + return BSTR_ERR; + if (len + 1 > a->mlen && 0 > balloc(a, len + 1)) return BSTR_ERR; + bBlockCopy(a->data, s, (size_t)len); + a->data[len] = (unsigned char)'\0'; + a->slen = len; + return BSTR_OK; } /* int btrunc (bstring b, int n) * * Truncate the bstring to at most n characters. */ -int btrunc (bstring b, int n) { - if (n < 0 || b == NULL || b->data == NULL || b->mlen < b->slen || - b->slen < 0 || b->mlen <= 0) return BSTR_ERR; - if (b->slen > n) { - b->slen = n; - b->data[n] = (unsigned char) '\0'; - } - return BSTR_OK; +int btrunc(bstring b, int n) +{ + if (n < 0 || b == NULL || b->data == NULL || b->mlen < b->slen || + b->slen < 0 || b->mlen <= 0) return BSTR_ERR; + if (b->slen > n) + { + b->slen = n; + b->data[n] = (unsigned char)'\0'; + } + return BSTR_OK; } #define upcase(c) (toupper ((unsigned char) c)) @@ -559,64 +610,72 @@ int btrunc (bstring b, int n) { * * Convert contents of bstring to upper case. */ -int btoupper (bstring b) { -int i, len; - if (b == NULL || b->data == NULL || b->mlen < b->slen || - b->slen < 0 || b->mlen <= 0) return BSTR_ERR; - for (i=0, len = b->slen; i < len; i++) { - b->data[i] = (unsigned char) upcase (b->data[i]); - } - return BSTR_OK; +int btoupper(bstring b) +{ + int i, len; + if (b == NULL || b->data == NULL || b->mlen < b->slen || + b->slen < 0 || b->mlen <= 0) return BSTR_ERR; + for (i = 0, len = b->slen; i < len; i++) + { + b->data[i] = (unsigned char)upcase(b->data[i]); + } + return BSTR_OK; } /* int btolower (bstring b) * * Convert contents of bstring to lower case. */ -int btolower (bstring b) { -int i, len; - if (b == NULL || b->data == NULL || b->mlen < b->slen || - b->slen < 0 || b->mlen <= 0) return BSTR_ERR; - for (i=0, len = b->slen; i < len; i++) { - b->data[i] = (unsigned char) downcase (b->data[i]); - } - return BSTR_OK; +int btolower(bstring b) +{ + int i, len; + if (b == NULL || b->data == NULL || b->mlen < b->slen || + b->slen < 0 || b->mlen <= 0) return BSTR_ERR; + for (i = 0, len = b->slen; i < len; i++) + { + b->data[i] = (unsigned char)downcase(b->data[i]); + } + return BSTR_OK; } /* int bstricmp (const_bstring b0, const_bstring b1) * - * Compare two strings without differentiating between case. The return - * value is the difference of the values of the characters where the two - * strings first differ after lower case transformation, otherwise 0 is - * returned indicating that the strings are equal. If the lengths are - * different, then a difference from 0 is given, but if the first extra + * Compare two strings without differentiating between case. The return + * value is the difference of the values of the characters where the two + * strings first differ after lower case transformation, otherwise 0 is + * returned indicating that the strings are equal. If the lengths are + * different, then a difference from 0 is given, but if the first extra * character is '\0', then it is taken to be the value UCHAR_MAX+1. */ -int bstricmp (const_bstring b0, const_bstring b1) { -int i, v, n; - - if (bdata (b0) == NULL || b0->slen < 0 || - bdata (b1) == NULL || b1->slen < 0) return SHRT_MIN; - if ((n = b0->slen) > b1->slen) n = b1->slen; - else if (b0->slen == b1->slen && b0->data == b1->data) return BSTR_OK; - - for (i = 0; i < n; i ++) { - v = (char) downcase (b0->data[i]) - - (char) downcase (b1->data[i]); - if (0 != v) return v; - } - - if (b0->slen > n) { - v = (char) downcase (b0->data[n]); - if (v) return v; - return UCHAR_MAX + 1; - } - if (b1->slen > n) { - v = - (char) downcase (b1->data[n]); - if (v) return v; - return - (int) (UCHAR_MAX + 1); - } - return BSTR_OK; +int bstricmp(const_bstring b0, const_bstring b1) +{ + int i, v, n; + + if (bdata(b0) == NULL || b0->slen < 0 || + bdata(b1) == NULL || b1->slen < 0) return SHRT_MIN; + if ((n = b0->slen) > b1->slen) n = b1->slen; + else if (b0->slen == b1->slen && b0->data == b1->data) return BSTR_OK; + + for (i = 0; i < n; i++) + { + v = (char)downcase(b0->data[i]) + - (char)downcase(b1->data[i]); + if (0 != v) return v; + } + + if (b0->slen > n) + { + v = (char)downcase(b0->data[n]); + if (v) return v; + return UCHAR_MAX + 1; + } + if (b1->slen > n) + { + v = -(char)downcase(b1->data[n]); + if (v) return v; + return -(int)(UCHAR_MAX + 1); + } + return BSTR_OK; } /* int bstrnicmp (const_bstring b0, const_bstring b1, int n) @@ -625,88 +684,98 @@ int i, v, n; * characters. If the position where the two strings first differ is * before the nth position, the return value is the difference of the values * of the characters, otherwise 0 is returned. If the lengths are different - * and less than n characters, then a difference from 0 is given, but if the - * first extra character is '\0', then it is taken to be the value + * and less than n characters, then a difference from 0 is given, but if the + * first extra character is '\0', then it is taken to be the value * UCHAR_MAX+1. */ -int bstrnicmp (const_bstring b0, const_bstring b1, int n) { -int i, v, m; +int bstrnicmp(const_bstring b0, const_bstring b1, int n) +{ + int i, v, m; - if (bdata (b0) == NULL || b0->slen < 0 || - bdata (b1) == NULL || b1->slen < 0 || n < 0) return SHRT_MIN; - m = n; - if (m > b0->slen) m = b0->slen; - if (m > b1->slen) m = b1->slen; + if (bdata(b0) == NULL || b0->slen < 0 || + bdata(b1) == NULL || b1->slen < 0 || n < 0) return SHRT_MIN; + m = n; + if (m > b0->slen) m = b0->slen; + if (m > b1->slen) m = b1->slen; - if (b0->data != b1->data) { - for (i = 0; i < m; i ++) { - v = (char) downcase (b0->data[i]); - v -= (char) downcase (b1->data[i]); - if (v != 0) return b0->data[i] - b1->data[i]; - } - } + if (b0->data != b1->data) + { + for (i = 0; i < m; i++) + { + v = (char)downcase(b0->data[i]); + v -= (char)downcase(b1->data[i]); + if (v != 0) return b0->data[i] - b1->data[i]; + } + } - if (n == m || b0->slen == b1->slen) return BSTR_OK; + if (n == m || b0->slen == b1->slen) return BSTR_OK; - if (b0->slen > m) { - v = (char) downcase (b0->data[m]); - if (v) return v; - return UCHAR_MAX + 1; - } + if (b0->slen > m) + { + v = (char)downcase(b0->data[m]); + if (v) return v; + return UCHAR_MAX + 1; + } - v = - (char) downcase (b1->data[m]); - if (v) return v; - return - (int) (UCHAR_MAX + 1); + v = -(char)downcase(b1->data[m]); + if (v) return v; + return -(int)(UCHAR_MAX + 1); } /* int biseqcaseless (const_bstring b0, const_bstring b1) * - * Compare two strings for equality without differentiating between case. - * If the strings differ other than in case, 0 is returned, if the strings - * are the same, 1 is returned, if there is an error, -1 is returned. If - * the length of the strings are different, this function is O(1). '\0' + * Compare two strings for equality without differentiating between case. + * If the strings differ other than in case, 0 is returned, if the strings + * are the same, 1 is returned, if there is an error, -1 is returned. If + * the length of the strings are different, this function is O(1). '\0' * termination characters are not treated in any special way. */ -int biseqcaseless (const_bstring b0, const_bstring b1) { -int i, n; - - if (bdata (b0) == NULL || b0->slen < 0 || - bdata (b1) == NULL || b1->slen < 0) return BSTR_ERR; - if (b0->slen != b1->slen) return BSTR_OK; - if (b0->data == b1->data || b0->slen == 0) return 1; - for (i=0, n=b0->slen; i < n; i++) { - if (b0->data[i] != b1->data[i]) { - unsigned char c = (unsigned char) downcase (b0->data[i]); - if (c != (unsigned char) downcase (b1->data[i])) return 0; - } - } - return 1; +int biseqcaseless(const_bstring b0, const_bstring b1) +{ + int i, n; + + if (bdata(b0) == NULL || b0->slen < 0 || + bdata(b1) == NULL || b1->slen < 0) return BSTR_ERR; + if (b0->slen != b1->slen) return BSTR_OK; + if (b0->data == b1->data || b0->slen == 0) return 1; + for (i = 0, n = b0->slen; i < n; i++) + { + if (b0->data[i] != b1->data[i]) + { + unsigned char c = (unsigned char)downcase(b0->data[i]); + if (c != (unsigned char)downcase(b1->data[i])) return 0; + } + } + return 1; } /* int bisstemeqcaselessblk (const_bstring b0, const void * blk, int len) * - * Compare beginning of string b0 with a block of memory of length len + * Compare beginning of string b0 with a block of memory of length len * without differentiating between case for equality. If the beginning of b0 - * differs from the memory block other than in case (or if b0 is too short), - * 0 is returned, if the strings are the same, 1 is returned, if there is an - * error, -1 is returned. '\0' characters are not treated in any special + * differs from the memory block other than in case (or if b0 is too short), + * 0 is returned, if the strings are the same, 1 is returned, if there is an + * error, -1 is returned. '\0' characters are not treated in any special * way. */ -int bisstemeqcaselessblk (const_bstring b0, const void * blk, int len) { -int i; +int bisstemeqcaselessblk(const_bstring b0, const void * blk, int len) +{ + int i; - if (bdata (b0) == NULL || b0->slen < 0 || NULL == blk || len < 0) - return BSTR_ERR; - if (b0->slen < len) return BSTR_OK; - if (b0->data == (const unsigned char *) blk || len == 0) return 1; + if (bdata(b0) == NULL || b0->slen < 0 || NULL == blk || len < 0) + return BSTR_ERR; + if (b0->slen < len) return BSTR_OK; + if (b0->data == (const unsigned char *)blk || len == 0) return 1; - for (i = 0; i < len; i ++) { - if (b0->data[i] != ((const unsigned char *) blk)[i]) { - if (downcase (b0->data[i]) != - downcase (((const unsigned char *) blk)[i])) return 0; - } - } - return 1; + for (i = 0; i < len; i++) + { + if (b0->data[i] != ((const unsigned char *)blk)[i]) + { + if (downcase(b0->data[i]) != + downcase(((const unsigned char *)blk)[i])) return 0; + } + } + return 1; } /* @@ -714,21 +783,24 @@ int i; * * Delete whitespace contiguous from the left end of the string. */ -int bltrimws (bstring b) { -int i, len; +int bltrimws(bstring b) +{ + int i, len; - if (b == NULL || b->data == NULL || b->mlen < b->slen || - b->slen < 0 || b->mlen <= 0) return BSTR_ERR; + if (b == NULL || b->data == NULL || b->mlen < b->slen || + b->slen < 0 || b->mlen <= 0) return BSTR_ERR; - for (len = b->slen, i = 0; i < len; i++) { - if (!wspace (b->data[i])) { - return bdelete (b, 0, i); - } - } + for (len = b->slen, i = 0; i < len; i++) + { + if (!wspace(b->data[i])) + { + return bdelete(b, 0, i); + } + } - b->data[0] = (unsigned char) '\0'; - b->slen = 0; - return BSTR_OK; + b->data[0] = (unsigned char)'\0'; + b->slen = 0; + return BSTR_OK; } /* @@ -736,23 +808,26 @@ int i, len; * * Delete whitespace contiguous from the right end of the string. */ -int brtrimws (bstring b) { -int i; +int brtrimws(bstring b) +{ + int i; - if (b == NULL || b->data == NULL || b->mlen < b->slen || - b->slen < 0 || b->mlen <= 0) return BSTR_ERR; + if (b == NULL || b->data == NULL || b->mlen < b->slen || + b->slen < 0 || b->mlen <= 0) return BSTR_ERR; - for (i = b->slen - 1; i >= 0; i--) { - if (!wspace (b->data[i])) { - if (b->mlen > i) b->data[i+1] = (unsigned char) '\0'; - b->slen = i + 1; - return BSTR_OK; - } - } + for (i = b->slen - 1; i >= 0; i--) + { + if (!wspace(b->data[i])) + { + if (b->mlen > i) b->data[i + 1] = (unsigned char)'\0'; + b->slen = i + 1; + return BSTR_OK; + } + } - b->data[0] = (unsigned char) '\0'; - b->slen = 0; - return BSTR_OK; + b->data[0] = (unsigned char)'\0'; + b->slen = 0; + return BSTR_OK; } /* @@ -760,783 +835,865 @@ int i; * * Delete whitespace contiguous from both ends of the string. */ -int btrimws (bstring b) { -int i, j; +int btrimws(bstring b) +{ + int i, j; - if (b == NULL || b->data == NULL || b->mlen < b->slen || - b->slen < 0 || b->mlen <= 0) return BSTR_ERR; + if (b == NULL || b->data == NULL || b->mlen < b->slen || + b->slen < 0 || b->mlen <= 0) return BSTR_ERR; - for (i = b->slen - 1; i >= 0; i--) { - if (!wspace (b->data[i])) { - if (b->mlen > i) b->data[i+1] = (unsigned char) '\0'; - b->slen = i + 1; - for (j = 0; wspace (b->data[j]); j++) {} - return bdelete (b, 0, j); - } - } + for (i = b->slen - 1; i >= 0; i--) + { + if (!wspace(b->data[i])) + { + if (b->mlen > i) b->data[i + 1] = (unsigned char)'\0'; + b->slen = i + 1; + for (j = 0; wspace(b->data[j]); j++) + { + } + return bdelete(b, 0, j); + } + } - b->data[0] = (unsigned char) '\0'; - b->slen = 0; - return BSTR_OK; + b->data[0] = (unsigned char)'\0'; + b->slen = 0; + return BSTR_OK; } /* int biseq (const_bstring b0, const_bstring b1) * - * Compare the string b0 and b1. If the strings differ, 0 is returned, if - * the strings are the same, 1 is returned, if there is an error, -1 is + * Compare the string b0 and b1. If the strings differ, 0 is returned, if + * the strings are the same, 1 is returned, if there is an error, -1 is * returned. If the length of the strings are different, this function is * O(1). '\0' termination characters are not treated in any special way. */ -int biseq (const_bstring b0, const_bstring b1) { - if (b0 == NULL || b1 == NULL || b0->data == NULL || b1->data == NULL || - b0->slen < 0 || b1->slen < 0) return BSTR_ERR; - if (b0->slen != b1->slen) return BSTR_OK; - if (b0->data == b1->data || b0->slen == 0) return 1; - return !bstr__memcmp (b0->data, b1->data, b0->slen); +int biseq(const_bstring b0, const_bstring b1) +{ + if (b0 == NULL || b1 == NULL || b0->data == NULL || b1->data == NULL || + b0->slen < 0 || b1->slen < 0) return BSTR_ERR; + if (b0->slen != b1->slen) return BSTR_OK; + if (b0->data == b1->data || b0->slen == 0) return 1; + return !bstr__memcmp(b0->data, b1->data, b0->slen); } /* int bisstemeqblk (const_bstring b0, const void * blk, int len) * - * Compare beginning of string b0 with a block of memory of length len for - * equality. If the beginning of b0 differs from the memory block (or if b0 - * is too short), 0 is returned, if the strings are the same, 1 is returned, - * if there is an error, -1 is returned. '\0' characters are not treated in + * Compare beginning of string b0 with a block of memory of length len for + * equality. If the beginning of b0 differs from the memory block (or if b0 + * is too short), 0 is returned, if the strings are the same, 1 is returned, + * if there is an error, -1 is returned. '\0' characters are not treated in * any special way. */ -int bisstemeqblk (const_bstring b0, const void * blk, int len) { -int i; +int bisstemeqblk(const_bstring b0, const void * blk, int len) +{ + int i; - if (bdata (b0) == NULL || b0->slen < 0 || NULL == blk || len < 0) - return BSTR_ERR; - if (b0->slen < len) return BSTR_OK; - if (b0->data == (const unsigned char *) blk || len == 0) return 1; + if (bdata(b0) == NULL || b0->slen < 0 || NULL == blk || len < 0) + return BSTR_ERR; + if (b0->slen < len) return BSTR_OK; + if (b0->data == (const unsigned char *)blk || len == 0) return 1; - for (i = 0; i < len; i ++) { - if (b0->data[i] != ((const unsigned char *) blk)[i]) return BSTR_OK; - } - return 1; + for (i = 0; i < len; i++) + { + if (b0->data[i] != ((const unsigned char *)blk)[i]) return BSTR_OK; + } + return 1; } /* int biseqcstr (const_bstring b, const char *s) * - * Compare the bstring b and char * string s. The C string s must be '\0' - * terminated at exactly the length of the bstring b, and the contents - * between the two must be identical with the bstring b with no '\0' - * characters for the two contents to be considered equal. This is - * equivalent to the condition that their current contents will be always be - * equal when comparing them in the same format after converting one or the - * other. If the strings are equal 1 is returned, if they are unequal 0 is + * Compare the bstring b and char * string s. The C string s must be '\0' + * terminated at exactly the length of the bstring b, and the contents + * between the two must be identical with the bstring b with no '\0' + * characters for the two contents to be considered equal. This is + * equivalent to the condition that their current contents will be always be + * equal when comparing them in the same format after converting one or the + * other. If the strings are equal 1 is returned, if they are unequal 0 is * returned and if there is a detectable error BSTR_ERR is returned. */ -int biseqcstr (const_bstring b, const char * s) { -int i; - if (b == NULL || s == NULL || b->data == NULL || b->slen < 0) return BSTR_ERR; - for (i=0; i < b->slen; i++) { - if (s[i] == '\0' || b->data[i] != (unsigned char) s[i]) return BSTR_OK; - } - return s[i] == '\0'; +int biseqcstr(const_bstring b, const char * s) +{ + int i; + if (b == NULL || s == NULL || b->data == NULL || b->slen < 0) return BSTR_ERR; + for (i = 0; i < b->slen; i++) + { + if (s[i] == '\0' || b->data[i] != (unsigned char)s[i]) return BSTR_OK; + } + return s[i] == '\0'; } /* int biseqcstrcaseless (const_bstring b, const char *s) * - * Compare the bstring b and char * string s. The C string s must be '\0' - * terminated at exactly the length of the bstring b, and the contents - * between the two must be identical except for case with the bstring b with - * no '\0' characters for the two contents to be considered equal. This is - * equivalent to the condition that their current contents will be always be - * equal ignoring case when comparing them in the same format after - * converting one or the other. If the strings are equal, except for case, - * 1 is returned, if they are unequal regardless of case 0 is returned and + * Compare the bstring b and char * string s. The C string s must be '\0' + * terminated at exactly the length of the bstring b, and the contents + * between the two must be identical except for case with the bstring b with + * no '\0' characters for the two contents to be considered equal. This is + * equivalent to the condition that their current contents will be always be + * equal ignoring case when comparing them in the same format after + * converting one or the other. If the strings are equal, except for case, + * 1 is returned, if they are unequal regardless of case 0 is returned and * if there is a detectable error BSTR_ERR is returned. */ -int biseqcstrcaseless (const_bstring b, const char * s) { -int i; - if (b == NULL || s == NULL || b->data == NULL || b->slen < 0) return BSTR_ERR; - for (i=0; i < b->slen; i++) { - if (s[i] == '\0' || - (b->data[i] != (unsigned char) s[i] && - downcase (b->data[i]) != (unsigned char) downcase (s[i]))) - return BSTR_OK; - } - return s[i] == '\0'; +int biseqcstrcaseless(const_bstring b, const char * s) +{ + int i; + if (b == NULL || s == NULL || b->data == NULL || b->slen < 0) return BSTR_ERR; + for (i = 0; i < b->slen; i++) + { + if (s[i] == '\0' || + (b->data[i] != (unsigned char)s[i] && + downcase(b->data[i]) != (unsigned char)downcase(s[i]))) + return BSTR_OK; + } + return s[i] == '\0'; } /* int bstrcmp (const_bstring b0, const_bstring b1) * - * Compare the string b0 and b1. If there is an error, SHRT_MIN is returned, - * otherwise a value less than or greater than zero, indicating that the - * string pointed to by b0 is lexicographically less than or greater than - * the string pointed to by b1 is returned. If the the string lengths are - * unequal but the characters up until the length of the shorter are equal - * then a value less than, or greater than zero, indicating that the string - * pointed to by b0 is shorter or longer than the string pointed to by b1 is - * returned. 0 is returned if and only if the two strings are the same. If + * Compare the string b0 and b1. If there is an error, SHRT_MIN is returned, + * otherwise a value less than or greater than zero, indicating that the + * string pointed to by b0 is lexicographically less than or greater than + * the string pointed to by b1 is returned. If the the string lengths are + * unequal but the characters up until the length of the shorter are equal + * then a value less than, or greater than zero, indicating that the string + * pointed to by b0 is shorter or longer than the string pointed to by b1 is + * returned. 0 is returned if and only if the two strings are the same. If * the length of the strings are different, this function is O(n). Like its - * standard C library counter part strcmp, the comparison does not proceed + * standard C library counter part strcmp, the comparison does not proceed * past any '\0' termination characters encountered. */ -int bstrcmp (const_bstring b0, const_bstring b1) { -int i, v, n; +int bstrcmp(const_bstring b0, const_bstring b1) +{ + int i, v, n; - if (b0 == NULL || b1 == NULL || b0->data == NULL || b1->data == NULL || - b0->slen < 0 || b1->slen < 0) return SHRT_MIN; - n = b0->slen; if (n > b1->slen) n = b1->slen; - if (b0->slen == b1->slen && (b0->data == b1->data || b0->slen == 0)) - return BSTR_OK; + if (b0 == NULL || b1 == NULL || b0->data == NULL || b1->data == NULL || + b0->slen < 0 || b1->slen < 0) return SHRT_MIN; + n = b0->slen; if (n > b1->slen) n = b1->slen; + if (b0->slen == b1->slen && (b0->data == b1->data || b0->slen == 0)) + return BSTR_OK; - for (i = 0; i < n; i ++) { - v = ((char) b0->data[i]) - ((char) b1->data[i]); - if (v != 0) return v; - if (b0->data[i] == (unsigned char) '\0') return BSTR_OK; - } + for (i = 0; i < n; i++) + { + v = ((char)b0->data[i]) - ((char)b1->data[i]); + if (v != 0) return v; + if (b0->data[i] == (unsigned char)'\0') return BSTR_OK; + } - if (b0->slen > n) return 1; - if (b1->slen > n) return -1; - return BSTR_OK; + if (b0->slen > n) return 1; + if (b1->slen > n) return -1; + return BSTR_OK; } /* int bstrncmp (const_bstring b0, const_bstring b1, int n) * - * Compare the string b0 and b1 for at most n characters. If there is an - * error, SHRT_MIN is returned, otherwise a value is returned as if b0 and + * Compare the string b0 and b1 for at most n characters. If there is an + * error, SHRT_MIN is returned, otherwise a value is returned as if b0 and * b1 were first truncated to at most n characters then bstrcmp was called - * with these new strings are paremeters. If the length of the strings are - * different, this function is O(n). Like its standard C library counter - * part strcmp, the comparison does not proceed past any '\0' termination + * with these new strings are paremeters. If the length of the strings are + * different, this function is O(n). Like its standard C library counter + * part strcmp, the comparison does not proceed past any '\0' termination * characters encountered. */ -int bstrncmp (const_bstring b0, const_bstring b1, int n) { -int i, v, m; +int bstrncmp(const_bstring b0, const_bstring b1, int n) +{ + int i, v, m; - if (b0 == NULL || b1 == NULL || b0->data == NULL || b1->data == NULL || - b0->slen < 0 || b1->slen < 0) return SHRT_MIN; - m = n; - if (m > b0->slen) m = b0->slen; - if (m > b1->slen) m = b1->slen; + if (b0 == NULL || b1 == NULL || b0->data == NULL || b1->data == NULL || + b0->slen < 0 || b1->slen < 0) return SHRT_MIN; + m = n; + if (m > b0->slen) m = b0->slen; + if (m > b1->slen) m = b1->slen; - if (b0->data != b1->data) { - for (i = 0; i < m; i ++) { - v = ((char) b0->data[i]) - ((char) b1->data[i]); - if (v != 0) return v; - if (b0->data[i] == (unsigned char) '\0') return BSTR_OK; - } - } + if (b0->data != b1->data) + { + for (i = 0; i < m; i++) + { + v = ((char)b0->data[i]) - ((char)b1->data[i]); + if (v != 0) return v; + if (b0->data[i] == (unsigned char)'\0') return BSTR_OK; + } + } - if (n == m || b0->slen == b1->slen) return BSTR_OK; + if (n == m || b0->slen == b1->slen) return BSTR_OK; - if (b0->slen > m) return 1; - return -1; + if (b0->slen > m) return 1; + return -1; } /* bstring bmidstr (const_bstring b, int left, int len) * * Create a bstring which is the substring of b starting from position left * and running for a length len (clamped by the end of the bstring b.) If - * b is detectably invalid, then NULL is returned. The section described + * b is detectably invalid, then NULL is returned. The section described * by (left, len) is clamped to the boundaries of b. */ -bstring bmidstr (const_bstring b, int left, int len) { - - if (b == NULL || b->slen < 0 || b->data == NULL) return NULL; +bstring bmidstr(const_bstring b, int left, int len) +{ + if (b == NULL || b->slen < 0 || b->data == NULL) return NULL; - if (left < 0) { - len += left; - left = 0; - } + if (left < 0) + { + len += left; + left = 0; + } - if (len > b->slen - left) len = b->slen - left; + if (len > b->slen - left) len = b->slen - left; - if (len <= 0) return bfromcstr (""); - return blk2bstr (b->data + left, len); + if (len <= 0) return bfromcstr(""); + return blk2bstr(b->data + left, len); } /* int bdelete (bstring b, int pos, int len) * - * Removes characters from pos to pos+len-1 inclusive and shifts the tail of - * the bstring starting from pos+len to pos. len must be positive for this - * call to have any effect. The section of the string described by (pos, + * Removes characters from pos to pos+len-1 inclusive and shifts the tail of + * the bstring starting from pos+len to pos. len must be positive for this + * call to have any effect. The section of the string described by (pos, * len) is clamped to boundaries of the bstring b. */ -int bdelete (bstring b, int pos, int len) { - /* Clamp to left side of bstring */ - if (pos < 0) { - len += pos; - pos = 0; - } - - if (len < 0 || b == NULL || b->data == NULL || b->slen < 0 || - b->mlen < b->slen || b->mlen <= 0) - return BSTR_ERR; - if (len > 0 && pos < b->slen) { - if (pos + len >= b->slen) { - b->slen = pos; - } else { - bBlockCopy ((char *) (b->data + pos), - (char *) (b->data + pos + len), - b->slen - (pos+len)); - b->slen -= len; - } - b->data[b->slen] = (unsigned char) '\0'; - } - return BSTR_OK; +int bdelete(bstring b, int pos, int len) +{ + /* Clamp to left side of bstring */ + if (pos < 0) + { + len += pos; + pos = 0; + } + + if (len < 0 || b == NULL || b->data == NULL || b->slen < 0 || + b->mlen < b->slen || b->mlen <= 0) + return BSTR_ERR; + if (len > 0 && pos < b->slen) + { + if (pos + len >= b->slen) + { + b->slen = pos; + } + else + { + bBlockCopy((char *)(b->data + pos), + (char *)(b->data + pos + len), + b->slen - (pos + len)); + b->slen -= len; + } + b->data[b->slen] = (unsigned char)'\0'; + } + return BSTR_OK; } /* int bdestroy (bstring b) * * Free up the bstring. Note that if b is detectably invalid or not writable * then no action is performed and BSTR_ERR is returned. Like a freed memory - * allocation, dereferences, writes or any other action on b after it has + * allocation, dereferences, writes or any other action on b after it has * been bdestroyed is undefined. */ -int bdestroy (bstring b) { - if (b == NULL || b->slen < 0 || b->mlen <= 0 || b->mlen < b->slen || - b->data == NULL) - return BSTR_ERR; +int bdestroy(bstring b) +{ + if (b == NULL || b->slen < 0 || b->mlen <= 0 || b->mlen < b->slen || + b->data == NULL) + return BSTR_ERR; - bstr__free (b->data); + bstr__free(b->data); - /* In case there is any stale usage, there is one more chance to - notice this error. */ + /* In case there is any stale usage, there is one more chance to + notice this error. */ - b->slen = -1; - b->mlen = -__LINE__; - b->data = NULL; + b->slen = -1; + b->mlen = -__LINE__; + b->data = NULL; - bstr__free (b); - return BSTR_OK; + bstr__free(b); + return BSTR_OK; } /* int binstr (const_bstring b1, int pos, const_bstring b2) * - * Search for the bstring b2 in b1 starting from position pos, and searching - * forward. If it is found then return with the first position where it is - * found, otherwise return BSTR_ERR. Note that this is just a brute force - * string searcher that does not attempt clever things like the Boyer-Moore - * search algorithm. Because of this there are many degenerate cases where + * Search for the bstring b2 in b1 starting from position pos, and searching + * forward. If it is found then return with the first position where it is + * found, otherwise return BSTR_ERR. Note that this is just a brute force + * string searcher that does not attempt clever things like the Boyer-Moore + * search algorithm. Because of this there are many degenerate cases where * this can take much longer than it needs to. */ -int binstr (const_bstring b1, int pos, const_bstring b2) { -int j, ii, ll, lf; -unsigned char * d0; -unsigned char c0; -register unsigned char * d1; -register unsigned char c1; -register int i; - - if (b1 == NULL || b1->data == NULL || b1->slen < 0 || - b2 == NULL || b2->data == NULL || b2->slen < 0) return BSTR_ERR; - if (b1->slen == pos) return (b2->slen == 0)?pos:BSTR_ERR; - if (b1->slen < pos || pos < 0) return BSTR_ERR; - if (b2->slen == 0) return pos; - - /* No space to find such a string? */ - if ((lf = b1->slen - b2->slen + 1) <= pos) return BSTR_ERR; - - /* An obvious alias case */ - if (b1->data == b2->data && pos == 0) return 0; - - i = pos; - - d0 = b2->data; - d1 = b1->data; - ll = b2->slen; - - /* Peel off the b2->slen == 1 case */ - c0 = d0[0]; - if (1 == ll) { - for (;i < lf; i++) if (c0 == d1[i]) return i; - return BSTR_ERR; - } - - c1 = c0; - j = 0; - lf = b1->slen - 1; - - ii = -1; - if (i < lf) do { - /* Unrolled current character test */ - if (c1 != d1[i]) { - if (c1 != d1[1+i]) { - i += 2; - continue; - } - i++; - } - - /* Take note if this is the start of a potential match */ - if (0 == j) ii = i; - - /* Shift the test character down by one */ - j++; - i++; - - /* If this isn't past the last character continue */ - if (j < ll) { - c1 = d0[j]; - continue; - } - - N0:; - - /* If no characters mismatched, then we matched */ - if (i == ii+j) return ii; - - /* Shift back to the beginning */ - i -= j; - j = 0; - c1 = c0; - } while (i < lf); - - /* Deal with last case if unrolling caused a misalignment */ - if (i == lf && ll == j+1 && c1 == d1[i]) goto N0; - - return BSTR_ERR; +int binstr(const_bstring b1, int pos, const_bstring b2) +{ + int j, ii, ll, lf; + unsigned char * d0; + unsigned char c0; + register unsigned char * d1; + register unsigned char c1; + register int i; + + if (b1 == NULL || b1->data == NULL || b1->slen < 0 || + b2 == NULL || b2->data == NULL || b2->slen < 0) return BSTR_ERR; + if (b1->slen == pos) return (b2->slen == 0) ? pos : BSTR_ERR; + if (b1->slen < pos || pos < 0) return BSTR_ERR; + if (b2->slen == 0) return pos; + + /* No space to find such a string? */ + if ((lf = b1->slen - b2->slen + 1) <= pos) return BSTR_ERR; + + /* An obvious alias case */ + if (b1->data == b2->data && pos == 0) return 0; + + i = pos; + + d0 = b2->data; + d1 = b1->data; + ll = b2->slen; + + /* Peel off the b2->slen == 1 case */ + c0 = d0[0]; + if (1 == ll) + { + for (; i < lf; i++) + if (c0 == d1[i]) return i; + return BSTR_ERR; + } + + c1 = c0; + j = 0; + lf = b1->slen - 1; + + ii = -1; + if (i < lf) + do + { + /* Unrolled current character test */ + if (c1 != d1[i]) + { + if (c1 != d1[1 + i]) + { + i += 2; + continue; + } + i++; + } + + /* Take note if this is the start of a potential match */ + if (0 == j) ii = i; + + /* Shift the test character down by one */ + j++; + i++; + + /* If this isn't past the last character continue */ + if (j < ll) + { + c1 = d0[j]; + continue; + } + + N0:; + + /* If no characters mismatched, then we matched */ + if (i == ii + j) return ii; + + /* Shift back to the beginning */ + i -= j; + j = 0; + c1 = c0; + } + while (i < lf); + + /* Deal with last case if unrolling caused a misalignment */ + if (i == lf && ll == j + 1 && c1 == d1[i]) goto N0; + + return BSTR_ERR; } /* int binstrr (const_bstring b1, int pos, const_bstring b2) * - * Search for the bstring b2 in b1 starting from position pos, and searching - * backward. If it is found then return with the first position where it is - * found, otherwise return BSTR_ERR. Note that this is just a brute force - * string searcher that does not attempt clever things like the Boyer-Moore - * search algorithm. Because of this there are many degenerate cases where + * Search for the bstring b2 in b1 starting from position pos, and searching + * backward. If it is found then return with the first position where it is + * found, otherwise return BSTR_ERR. Note that this is just a brute force + * string searcher that does not attempt clever things like the Boyer-Moore + * search algorithm. Because of this there are many degenerate cases where * this can take much longer than it needs to. */ -int binstrr (const_bstring b1, int pos, const_bstring b2) { -int j, i, l; -unsigned char * d0, * d1; - - if (b1 == NULL || b1->data == NULL || b1->slen < 0 || - b2 == NULL || b2->data == NULL || b2->slen < 0) return BSTR_ERR; - if (b1->slen == pos && b2->slen == 0) return pos; - if (b1->slen < pos || pos < 0) return BSTR_ERR; - if (b2->slen == 0) return pos; - - /* Obvious alias case */ - if (b1->data == b2->data && pos == 0 && b2->slen <= b1->slen) return 0; - - i = pos; - if ((l = b1->slen - b2->slen) < 0) return BSTR_ERR; - - /* If no space to find such a string then snap back */ - if (l + 1 <= i) i = l; - j = 0; - - d0 = b2->data; - d1 = b1->data; - l = b2->slen; - - for (;;) { - if (d0[j] == d1[i + j]) { - j ++; - if (j >= l) return i; - } else { - i --; - if (i < 0) break; - j=0; - } - } - - return BSTR_ERR; +int binstrr(const_bstring b1, int pos, const_bstring b2) +{ + int j, i, l; + unsigned char * d0, * d1; + + if (b1 == NULL || b1->data == NULL || b1->slen < 0 || + b2 == NULL || b2->data == NULL || b2->slen < 0) return BSTR_ERR; + if (b1->slen == pos && b2->slen == 0) return pos; + if (b1->slen < pos || pos < 0) return BSTR_ERR; + if (b2->slen == 0) return pos; + + /* Obvious alias case */ + if (b1->data == b2->data && pos == 0 && b2->slen <= b1->slen) return 0; + + i = pos; + if ((l = b1->slen - b2->slen) < 0) return BSTR_ERR; + + /* If no space to find such a string then snap back */ + if (l + 1 <= i) i = l; + j = 0; + + d0 = b2->data; + d1 = b1->data; + l = b2->slen; + + for (;;) + { + if (d0[j] == d1[i + j]) + { + j++; + if (j >= l) return i; + } + else + { + i--; + if (i < 0) break; + j = 0; + } + } + + return BSTR_ERR; } /* int binstrcaseless (const_bstring b1, int pos, const_bstring b2) * - * Search for the bstring b2 in b1 starting from position pos, and searching - * forward but without regard to case. If it is found then return with the - * first position where it is found, otherwise return BSTR_ERR. Note that - * this is just a brute force string searcher that does not attempt clever - * things like the Boyer-Moore search algorithm. Because of this there are + * Search for the bstring b2 in b1 starting from position pos, and searching + * forward but without regard to case. If it is found then return with the + * first position where it is found, otherwise return BSTR_ERR. Note that + * this is just a brute force string searcher that does not attempt clever + * things like the Boyer-Moore search algorithm. Because of this there are * many degenerate cases where this can take much longer than it needs to. */ -int binstrcaseless (const_bstring b1, int pos, const_bstring b2) { -int j, i, l, ll; -unsigned char * d0, * d1; +int binstrcaseless(const_bstring b1, int pos, const_bstring b2) +{ + int j, i, l, ll; + unsigned char * d0, * d1; - if (b1 == NULL || b1->data == NULL || b1->slen < 0 || - b2 == NULL || b2->data == NULL || b2->slen < 0) return BSTR_ERR; - if (b1->slen == pos) return (b2->slen == 0)?pos:BSTR_ERR; - if (b1->slen < pos || pos < 0) return BSTR_ERR; - if (b2->slen == 0) return pos; + if (b1 == NULL || b1->data == NULL || b1->slen < 0 || + b2 == NULL || b2->data == NULL || b2->slen < 0) return BSTR_ERR; + if (b1->slen == pos) return (b2->slen == 0) ? pos : BSTR_ERR; + if (b1->slen < pos || pos < 0) return BSTR_ERR; + if (b2->slen == 0) return pos; - l = b1->slen - b2->slen + 1; + l = b1->slen - b2->slen + 1; - /* No space to find such a string? */ - if (l <= pos) return BSTR_ERR; + /* No space to find such a string? */ + if (l <= pos) return BSTR_ERR; - /* An obvious alias case */ - if (b1->data == b2->data && pos == 0) return BSTR_OK; + /* An obvious alias case */ + if (b1->data == b2->data && pos == 0) return BSTR_OK; - i = pos; - j = 0; + i = pos; + j = 0; - d0 = b2->data; - d1 = b1->data; - ll = b2->slen; + d0 = b2->data; + d1 = b1->data; + ll = b2->slen; - for (;;) { - if (d0[j] == d1[i + j] || downcase (d0[j]) == downcase (d1[i + j])) { - j ++; - if (j >= ll) return i; - } else { - i ++; - if (i >= l) break; - j=0; - } - } + for (;;) + { + if (d0[j] == d1[i + j] || downcase(d0[j]) == downcase(d1[i + j])) + { + j++; + if (j >= ll) return i; + } + else + { + i++; + if (i >= l) break; + j = 0; + } + } - return BSTR_ERR; + return BSTR_ERR; } /* int binstrrcaseless (const_bstring b1, int pos, const_bstring b2) * - * Search for the bstring b2 in b1 starting from position pos, and searching - * backward but without regard to case. If it is found then return with the - * first position where it is found, otherwise return BSTR_ERR. Note that - * this is just a brute force string searcher that does not attempt clever - * things like the Boyer-Moore search algorithm. Because of this there are + * Search for the bstring b2 in b1 starting from position pos, and searching + * backward but without regard to case. If it is found then return with the + * first position where it is found, otherwise return BSTR_ERR. Note that + * this is just a brute force string searcher that does not attempt clever + * things like the Boyer-Moore search algorithm. Because of this there are * many degenerate cases where this can take much longer than it needs to. */ -int binstrrcaseless (const_bstring b1, int pos, const_bstring b2) { -int j, i, l; -unsigned char * d0, * d1; - - if (b1 == NULL || b1->data == NULL || b1->slen < 0 || - b2 == NULL || b2->data == NULL || b2->slen < 0) return BSTR_ERR; - if (b1->slen == pos && b2->slen == 0) return pos; - if (b1->slen < pos || pos < 0) return BSTR_ERR; - if (b2->slen == 0) return pos; - - /* Obvious alias case */ - if (b1->data == b2->data && pos == 0 && b2->slen <= b1->slen) return BSTR_OK; - - i = pos; - if ((l = b1->slen - b2->slen) < 0) return BSTR_ERR; - - /* If no space to find such a string then snap back */ - if (l + 1 <= i) i = l; - j = 0; - - d0 = b2->data; - d1 = b1->data; - l = b2->slen; - - for (;;) { - if (d0[j] == d1[i + j] || downcase (d0[j]) == downcase (d1[i + j])) { - j ++; - if (j >= l) return i; - } else { - i --; - if (i < 0) break; - j=0; - } - } - - return BSTR_ERR; +int binstrrcaseless(const_bstring b1, int pos, const_bstring b2) +{ + int j, i, l; + unsigned char * d0, * d1; + + if (b1 == NULL || b1->data == NULL || b1->slen < 0 || + b2 == NULL || b2->data == NULL || b2->slen < 0) return BSTR_ERR; + if (b1->slen == pos && b2->slen == 0) return pos; + if (b1->slen < pos || pos < 0) return BSTR_ERR; + if (b2->slen == 0) return pos; + + /* Obvious alias case */ + if (b1->data == b2->data && pos == 0 && b2->slen <= b1->slen) return BSTR_OK; + + i = pos; + if ((l = b1->slen - b2->slen) < 0) return BSTR_ERR; + + /* If no space to find such a string then snap back */ + if (l + 1 <= i) i = l; + j = 0; + + d0 = b2->data; + d1 = b1->data; + l = b2->slen; + + for (;;) + { + if (d0[j] == d1[i + j] || downcase(d0[j]) == downcase(d1[i + j])) + { + j++; + if (j >= l) return i; + } + else + { + i--; + if (i < 0) break; + j = 0; + } + } + + return BSTR_ERR; } - /* int bstrchrp (const_bstring b, int c, int pos) * - * Search for the character c in b forwards from the position pos + * Search for the character c in b forwards from the position pos * (inclusive). */ -int bstrchrp (const_bstring b, int c, int pos) { -unsigned char * p; +int bstrchrp(const_bstring b, int c, int pos) +{ + unsigned char * p; - if (b == NULL || b->data == NULL || b->slen <= pos || pos < 0) return BSTR_ERR; - p = (unsigned char *) bstr__memchr ((b->data + pos), (unsigned char) c, (b->slen - pos)); - if (p) return (int) (p - b->data); - return BSTR_ERR; + if (b == NULL || b->data == NULL || b->slen <= pos || pos < 0) return BSTR_ERR; + p = (unsigned char *)bstr__memchr((b->data + pos), (unsigned char)c, (b->slen - pos)); + if (p) return (int)(p - b->data); + return BSTR_ERR; } /* int bstrrchrp (const_bstring b, int c, int pos) * - * Search for the character c in b backwards from the position pos in string + * Search for the character c in b backwards from the position pos in string * (inclusive). */ -int bstrrchrp (const_bstring b, int c, int pos) { -int i; - - if (b == NULL || b->data == NULL || b->slen <= pos || pos < 0) return BSTR_ERR; - for (i=pos; i >= 0; i--) { - if (b->data[i] == (unsigned char) c) return i; - } - return BSTR_ERR; +int bstrrchrp(const_bstring b, int c, int pos) +{ + int i; + + if (b == NULL || b->data == NULL || b->slen <= pos || pos < 0) return BSTR_ERR; + for (i = pos; i >= 0; i--) + { + if (b->data[i] == (unsigned char)c) return i; + } + return BSTR_ERR; } -#if !defined (BSTRLIB_AGGRESSIVE_MEMORY_FOR_SPEED_TRADEOFF) +#if !defined(BSTRLIB_AGGRESSIVE_MEMORY_FOR_SPEED_TRADEOFF) #define LONG_LOG_BITS_QTY (3) #define LONG_BITS_QTY (1 << LONG_LOG_BITS_QTY) #define LONG_TYPE unsigned char #define CFCLEN ((1 << CHAR_BIT) / LONG_BITS_QTY) struct charField { LONG_TYPE content[CFCLEN]; }; -#define testInCharField(cf,c) ((cf)->content[(c) >> LONG_LOG_BITS_QTY] & (((long)1) << ((c) & (LONG_BITS_QTY-1)))) -#define setInCharField(cf,idx) { \ - unsigned int c = (unsigned int) (idx); \ - (cf)->content[c >> LONG_LOG_BITS_QTY] |= (LONG_TYPE) (1ul << (c & (LONG_BITS_QTY-1))); \ +#define testInCharField(cf, c) ((cf)->content[(c) >> LONG_LOG_BITS_QTY] & (((long)1) << ((c) & (LONG_BITS_QTY-1)))) +#define setInCharField(cf, idx) {\ + unsigned int c = (unsigned int) (idx); \ + (cf)->content[c >> LONG_LOG_BITS_QTY] |= (LONG_TYPE) (1ul << (c & (LONG_BITS_QTY-1))); \ } #else #define CFCLEN (1 << CHAR_BIT) struct charField { unsigned char content[CFCLEN]; }; -#define testInCharField(cf,c) ((cf)->content[(unsigned char) (c)]) -#define setInCharField(cf,idx) (cf)->content[(unsigned int) (idx)] = ~0 +#define testInCharField(cf, c) ((cf)->content[(unsigned char) (c)]) +#define setInCharField(cf, idx) (cf)->content[(unsigned int) (idx)] = ~0 #endif /* Convert a bstring to charField */ -static int buildCharField (struct charField * cf, const_bstring b) { -int i; - if (b == NULL || b->data == NULL || b->slen <= 0) return BSTR_ERR; - memset ((void *) cf->content, 0, sizeof (struct charField)); - for (i=0; i < b->slen; i++) { - setInCharField (cf, b->data[i]); - } - return BSTR_OK; +static int buildCharField(struct charField * cf, const_bstring b) +{ + int i; + if (b == NULL || b->data == NULL || b->slen <= 0) return BSTR_ERR; + memset((void *)cf->content, 0, sizeof(struct charField)); + for (i = 0; i < b->slen; i++) + { + setInCharField(cf, b->data[i]); + } + return BSTR_OK; } -static void invertCharField (struct charField * cf) { -int i; - for (i=0; i < CFCLEN; i++) cf->content[i] = ~cf->content[i]; +static void invertCharField(struct charField * cf) +{ + int i; + for (i = 0; i < CFCLEN; i++) + cf->content[i] = ~cf->content[i]; } /* Inner engine for binchr */ -static int binchrCF (const unsigned char * data, int len, int pos, const struct charField * cf) { -int i; - for (i=pos; i < len; i++) { - unsigned char c = (unsigned char) data[i]; - if (testInCharField (cf, c)) return i; - } - return BSTR_ERR; +static int binchrCF(const unsigned char * data, int len, int pos, const struct charField * cf) +{ + int i; + for (i = pos; i < len; i++) + { + unsigned char c = (unsigned char)data[i]; + if (testInCharField(cf, c)) return i; + } + return BSTR_ERR; } /* int binchr (const_bstring b0, int pos, const_bstring b1); * - * Search for the first position in b0 starting from pos or after, in which - * one of the characters in b1 is found and return it. If such a position + * Search for the first position in b0 starting from pos or after, in which + * one of the characters in b1 is found and return it. If such a position * does not exist in b0, then BSTR_ERR is returned. */ -int binchr (const_bstring b0, int pos, const_bstring b1) { -struct charField chrs; - if (pos < 0 || b0 == NULL || b0->data == NULL || - b0->slen <= pos) return BSTR_ERR; - if (1 == b1->slen) return bstrchrp (b0, b1->data[0], pos); - if (0 > buildCharField (&chrs, b1)) return BSTR_ERR; - return binchrCF (b0->data, b0->slen, pos, &chrs); +int binchr(const_bstring b0, int pos, const_bstring b1) +{ + struct charField chrs; + if (pos < 0 || b0 == NULL || b0->data == NULL || + b0->slen <= pos) return BSTR_ERR; + if (1 == b1->slen) return bstrchrp(b0, b1->data[0], pos); + if (0 > buildCharField(&chrs, b1)) return BSTR_ERR; + return binchrCF(b0->data, b0->slen, pos, &chrs); } /* Inner engine for binchrr */ -static int binchrrCF (const unsigned char * data, int pos, const struct charField * cf) { -int i; - for (i=pos; i >= 0; i--) { - unsigned int c = (unsigned int) data[i]; - if (testInCharField (cf, c)) return i; - } - return BSTR_ERR; +static int binchrrCF(const unsigned char * data, int pos, const struct charField * cf) +{ + int i; + for (i = pos; i >= 0; i--) + { + unsigned int c = (unsigned int)data[i]; + if (testInCharField(cf, c)) return i; + } + return BSTR_ERR; } /* int binchrr (const_bstring b0, int pos, const_bstring b1); * - * Search for the last position in b0 no greater than pos, in which one of - * the characters in b1 is found and return it. If such a position does not + * Search for the last position in b0 no greater than pos, in which one of + * the characters in b1 is found and return it. If such a position does not * exist in b0, then BSTR_ERR is returned. */ -int binchrr (const_bstring b0, int pos, const_bstring b1) { -struct charField chrs; - if (pos < 0 || b0 == NULL || b0->data == NULL || b1 == NULL || - b0->slen < pos) return BSTR_ERR; - if (pos == b0->slen) pos--; - if (1 == b1->slen) return bstrrchrp (b0, b1->data[0], pos); - if (0 > buildCharField (&chrs, b1)) return BSTR_ERR; - return binchrrCF (b0->data, pos, &chrs); +int binchrr(const_bstring b0, int pos, const_bstring b1) +{ + struct charField chrs; + if (pos < 0 || b0 == NULL || b0->data == NULL || b1 == NULL || + b0->slen < pos) return BSTR_ERR; + if (pos == b0->slen) pos--; + if (1 == b1->slen) return bstrrchrp(b0, b1->data[0], pos); + if (0 > buildCharField(&chrs, b1)) return BSTR_ERR; + return binchrrCF(b0->data, pos, &chrs); } /* int bninchr (const_bstring b0, int pos, const_bstring b1); * - * Search for the first position in b0 starting from pos or after, in which - * none of the characters in b1 is found and return it. If such a position + * Search for the first position in b0 starting from pos or after, in which + * none of the characters in b1 is found and return it. If such a position * does not exist in b0, then BSTR_ERR is returned. */ -int bninchr (const_bstring b0, int pos, const_bstring b1) { -struct charField chrs; - if (pos < 0 || b0 == NULL || b0->data == NULL || - b0->slen <= pos) return BSTR_ERR; - if (buildCharField (&chrs, b1) < 0) return BSTR_ERR; - invertCharField (&chrs); - return binchrCF (b0->data, b0->slen, pos, &chrs); +int bninchr(const_bstring b0, int pos, const_bstring b1) +{ + struct charField chrs; + if (pos < 0 || b0 == NULL || b0->data == NULL || + b0->slen <= pos) return BSTR_ERR; + if (buildCharField(&chrs, b1) < 0) return BSTR_ERR; + invertCharField(&chrs); + return binchrCF(b0->data, b0->slen, pos, &chrs); } /* int bninchrr (const_bstring b0, int pos, const_bstring b1); * - * Search for the last position in b0 no greater than pos, in which none of - * the characters in b1 is found and return it. If such a position does not + * Search for the last position in b0 no greater than pos, in which none of + * the characters in b1 is found and return it. If such a position does not * exist in b0, then BSTR_ERR is returned. */ -int bninchrr (const_bstring b0, int pos, const_bstring b1) { -struct charField chrs; - if (pos < 0 || b0 == NULL || b0->data == NULL || - b0->slen < pos) return BSTR_ERR; - if (pos == b0->slen) pos--; - if (buildCharField (&chrs, b1) < 0) return BSTR_ERR; - invertCharField (&chrs); - return binchrrCF (b0->data, pos, &chrs); +int bninchrr(const_bstring b0, int pos, const_bstring b1) +{ + struct charField chrs; + if (pos < 0 || b0 == NULL || b0->data == NULL || + b0->slen < pos) return BSTR_ERR; + if (pos == b0->slen) pos--; + if (buildCharField(&chrs, b1) < 0) return BSTR_ERR; + invertCharField(&chrs); + return binchrrCF(b0->data, pos, &chrs); } /* int bsetstr (bstring b0, int pos, bstring b1, unsigned char fill) * - * Overwrite the string b0 starting at position pos with the string b1. If - * the position pos is past the end of b0, then the character "fill" is + * Overwrite the string b0 starting at position pos with the string b1. If + * the position pos is past the end of b0, then the character "fill" is * appended as necessary to make up the gap between the end of b0 and pos. * If b1 is NULL, it behaves as if it were a 0-length string. */ -int bsetstr (bstring b0, int pos, const_bstring b1, unsigned char fill) { -int d, newlen; -ptrdiff_t pd; -bstring aux = (bstring) b1; +int bsetstr(bstring b0, int pos, const_bstring b1, unsigned char fill) +{ + int d, newlen; + ptrdiff_t pd; + bstring aux = (bstring)b1; - if (pos < 0 || b0 == NULL || b0->slen < 0 || NULL == b0->data || - b0->mlen < b0->slen || b0->mlen <= 0) return BSTR_ERR; - if (b1 != NULL && (b1->slen < 0 || b1->data == NULL)) return BSTR_ERR; + if (pos < 0 || b0 == NULL || b0->slen < 0 || NULL == b0->data || + b0->mlen < b0->slen || b0->mlen <= 0) return BSTR_ERR; + if (b1 != NULL && (b1->slen < 0 || b1->data == NULL)) return BSTR_ERR; - d = pos; + d = pos; - /* Aliasing case */ - if (NULL != aux) { - if ((pd = (ptrdiff_t) (b1->data - b0->data)) >= 0 && pd < (ptrdiff_t) b0->mlen) { - if (NULL == (aux = bstrcpy (b1))) return BSTR_ERR; - } - d += aux->slen; - } + /* Aliasing case */ + if (NULL != aux) + { + if ((pd = (ptrdiff_t)(b1->data - b0->data)) >= 0 && pd < (ptrdiff_t)b0->mlen) + { + if (NULL == (aux = bstrcpy(b1))) return BSTR_ERR; + } + d += aux->slen; + } - /* Increase memory size if necessary */ - if (balloc (b0, d + 1) != BSTR_OK) { - if (aux != b1) bdestroy (aux); - return BSTR_ERR; - } + /* Increase memory size if necessary */ + if (balloc(b0, d + 1) != BSTR_OK) + { + if (aux != b1) bdestroy(aux); + return BSTR_ERR; + } - newlen = b0->slen; + newlen = b0->slen; - /* Fill in "fill" character as necessary */ - if (pos > newlen) { - bstr__memset (b0->data + b0->slen, (int) fill, (size_t) (pos - b0->slen)); - newlen = pos; - } + /* Fill in "fill" character as necessary */ + if (pos > newlen) + { + bstr__memset(b0->data + b0->slen, (int)fill, (size_t)(pos - b0->slen)); + newlen = pos; + } - /* Copy b1 to position pos in b0. */ - if (aux != NULL) { - bBlockCopy ((char *) (b0->data + pos), (char *) aux->data, aux->slen); - if (aux != b1) bdestroy (aux); - } + /* Copy b1 to position pos in b0. */ + if (aux != NULL) + { + bBlockCopy((char *)(b0->data + pos), (char *)aux->data, aux->slen); + if (aux != b1) bdestroy(aux); + } - /* Indicate the potentially increased size of b0 */ - if (d > newlen) newlen = d; + /* Indicate the potentially increased size of b0 */ + if (d > newlen) newlen = d; - b0->slen = newlen; - b0->data[newlen] = (unsigned char) '\0'; + b0->slen = newlen; + b0->data[newlen] = (unsigned char)'\0'; - return BSTR_OK; + return BSTR_OK; } /* int binsert (bstring b1, int pos, bstring b2, unsigned char fill) * - * Inserts the string b2 into b1 at position pos. If the position pos is - * past the end of b1, then the character "fill" is appended as necessary to + * Inserts the string b2 into b1 at position pos. If the position pos is + * past the end of b1, then the character "fill" is appended as necessary to * make up the gap between the end of b1 and pos. Unlike bsetstr, binsert * does not allow b2 to be NULL. */ -int binsert (bstring b1, int pos, const_bstring b2, unsigned char fill) { -int d, l; -ptrdiff_t pd; -bstring aux = (bstring) b2; - - if (pos < 0 || b1 == NULL || b2 == NULL || b1->slen < 0 || - b2->slen < 0 || b1->mlen < b1->slen || b1->mlen <= 0) return BSTR_ERR; - - /* Aliasing case */ - if ((pd = (ptrdiff_t) (b2->data - b1->data)) >= 0 && pd < (ptrdiff_t) b1->mlen) { - if (NULL == (aux = bstrcpy (b2))) return BSTR_ERR; - } - - /* Compute the two possible end pointers */ - d = b1->slen + aux->slen; - l = pos + aux->slen; - if ((d|l) < 0) return BSTR_ERR; - - if (l > d) { - /* Inserting past the end of the string */ - if (balloc (b1, l + 1) != BSTR_OK) { - if (aux != b2) bdestroy (aux); - return BSTR_ERR; - } - bstr__memset (b1->data + b1->slen, (int) fill, (size_t) (pos - b1->slen)); - b1->slen = l; - } else { - /* Inserting in the middle of the string */ - if (balloc (b1, d + 1) != BSTR_OK) { - if (aux != b2) bdestroy (aux); - return BSTR_ERR; - } - bBlockCopy (b1->data + l, b1->data + pos, d - l); - b1->slen = d; - } - bBlockCopy (b1->data + pos, aux->data, aux->slen); - b1->data[b1->slen] = (unsigned char) '\0'; - if (aux != b2) bdestroy (aux); - return BSTR_OK; -} - -/* int breplace (bstring b1, int pos, int len, bstring b2, +int binsert(bstring b1, int pos, const_bstring b2, unsigned char fill) +{ + int d, l; + ptrdiff_t pd; + bstring aux = (bstring)b2; + + if (pos < 0 || b1 == NULL || b2 == NULL || b1->slen < 0 || + b2->slen < 0 || b1->mlen < b1->slen || b1->mlen <= 0) return BSTR_ERR; + + /* Aliasing case */ + if ((pd = (ptrdiff_t)(b2->data - b1->data)) >= 0 && pd < (ptrdiff_t)b1->mlen) + { + if (NULL == (aux = bstrcpy(b2))) return BSTR_ERR; + } + + /* Compute the two possible end pointers */ + d = b1->slen + aux->slen; + l = pos + aux->slen; + if ((d | l) < 0) return BSTR_ERR; + + if (l > d) + { + /* Inserting past the end of the string */ + if (balloc(b1, l + 1) != BSTR_OK) + { + if (aux != b2) bdestroy(aux); + return BSTR_ERR; + } + bstr__memset(b1->data + b1->slen, (int)fill, (size_t)(pos - b1->slen)); + b1->slen = l; + } + else + { + /* Inserting in the middle of the string */ + if (balloc(b1, d + 1) != BSTR_OK) + { + if (aux != b2) bdestroy(aux); + return BSTR_ERR; + } + bBlockCopy(b1->data + l, b1->data + pos, d - l); + b1->slen = d; + } + bBlockCopy(b1->data + pos, aux->data, aux->slen); + b1->data[b1->slen] = (unsigned char)'\0'; + if (aux != b2) bdestroy(aux); + return BSTR_OK; +} + +/* int breplace (bstring b1, int pos, int len, bstring b2, * unsigned char fill) * * Replace a section of a string from pos for a length len with the string b2. * fill is used is pos > b1->slen. */ -int breplace (bstring b1, int pos, int len, const_bstring b2, - unsigned char fill) { -int pl, ret; -ptrdiff_t pd; -bstring aux = (bstring) b2; - - if (pos < 0 || len < 0 || (pl = pos + len) < 0 || b1 == NULL || - b2 == NULL || b1->data == NULL || b2->data == NULL || - b1->slen < 0 || b2->slen < 0 || b1->mlen < b1->slen || - b1->mlen <= 0) return BSTR_ERR; - - /* Straddles the end? */ - if (pl >= b1->slen) { - if ((ret = bsetstr (b1, pos, b2, fill)) < 0) return ret; - if (pos + b2->slen < b1->slen) { - b1->slen = pos + b2->slen; - b1->data[b1->slen] = (unsigned char) '\0'; - } - return ret; - } - - /* Aliasing case */ - if ((pd = (ptrdiff_t) (b2->data - b1->data)) >= 0 && pd < (ptrdiff_t) b1->slen) { - if (NULL == (aux = bstrcpy (b2))) return BSTR_ERR; - } - - if (aux->slen > len) { - if (balloc (b1, b1->slen + aux->slen - len) != BSTR_OK) { - if (aux != b2) bdestroy (aux); - return BSTR_ERR; - } - } - - if (aux->slen != len) bstr__memmove (b1->data + pos + aux->slen, b1->data + pos + len, b1->slen - (pos + len)); - bstr__memcpy (b1->data + pos, aux->data, aux->slen); - b1->slen += aux->slen - len; - b1->data[b1->slen] = (unsigned char) '\0'; - if (aux != b2) bdestroy (aux); - return BSTR_OK; -} - -/* - * findreplaceengine is used to implement bfindreplace and +int breplace(bstring b1, int pos, int len, const_bstring b2, + unsigned char fill) +{ + int pl, ret; + ptrdiff_t pd; + bstring aux = (bstring)b2; + + if (pos < 0 || len < 0 || (pl = pos + len) < 0 || b1 == NULL || + b2 == NULL || b1->data == NULL || b2->data == NULL || + b1->slen < 0 || b2->slen < 0 || b1->mlen < b1->slen || + b1->mlen <= 0) return BSTR_ERR; + + /* Straddles the end? */ + if (pl >= b1->slen) + { + if ((ret = bsetstr(b1, pos, b2, fill)) < 0) return ret; + if (pos + b2->slen < b1->slen) + { + b1->slen = pos + b2->slen; + b1->data[b1->slen] = (unsigned char)'\0'; + } + return ret; + } + + /* Aliasing case */ + if ((pd = (ptrdiff_t)(b2->data - b1->data)) >= 0 && pd < (ptrdiff_t)b1->slen) + { + if (NULL == (aux = bstrcpy(b2))) return BSTR_ERR; + } + + if (aux->slen > len) + { + if (balloc(b1, b1->slen + aux->slen - len) != BSTR_OK) + { + if (aux != b2) bdestroy(aux); + return BSTR_ERR; + } + } + + if (aux->slen != len) bstr__memmove(b1->data + pos + aux->slen, b1->data + pos + len, b1->slen - (pos + len)); + bstr__memcpy(b1->data + pos, aux->data, aux->slen); + b1->slen += aux->slen - len; + b1->data[b1->slen] = (unsigned char)'\0'; + if (aux != b2) bdestroy(aux); + return BSTR_OK; +} + +/* + * findreplaceengine is used to implement bfindreplace and * bfindreplacecaseless. It works by breaking the three cases of * expansion, reduction and replacement, and solving each of these * in the most efficient way possible. @@ -1546,408 +1703,452 @@ typedef int (*instr_fnptr) (const_bstring s1, int pos, const_bstring s2); #define INITIAL_STATIC_FIND_INDEX_COUNT 32 -static int findreplaceengine (bstring b, const_bstring find, const_bstring repl, int pos, instr_fnptr instr) { -int i, ret, slen, mlen, delta, acc; -int * d; -int static_d[INITIAL_STATIC_FIND_INDEX_COUNT+1]; /* This +1 is unnecessary, but it shuts up LINT. */ -ptrdiff_t pd; -bstring auxf = (bstring) find; -bstring auxr = (bstring) repl; - - if (b == NULL || b->data == NULL || find == NULL || - find->data == NULL || repl == NULL || repl->data == NULL || - pos < 0 || find->slen <= 0 || b->mlen < 0 || b->slen > b->mlen || - b->mlen <= 0 || b->slen < 0 || repl->slen < 0) return BSTR_ERR; - if (pos > b->slen - find->slen) return BSTR_OK; - - /* Alias with find string */ - pd = (ptrdiff_t) (find->data - b->data); - if ((ptrdiff_t) (pos - find->slen) < pd && pd < (ptrdiff_t) b->slen) { - if (NULL == (auxf = bstrcpy (find))) return BSTR_ERR; - } - - /* Alias with repl string */ - pd = (ptrdiff_t) (repl->data - b->data); - if ((ptrdiff_t) (pos - repl->slen) < pd && pd < (ptrdiff_t) b->slen) { - if (NULL == (auxr = bstrcpy (repl))) { - if (auxf != find) bdestroy (auxf); - return BSTR_ERR; - } - } - - delta = auxf->slen - auxr->slen; - - /* in-place replacement since find and replace strings are of equal - length */ - if (delta == 0) { - while ((pos = instr (b, pos, auxf)) >= 0) { - bstr__memcpy (b->data + pos, auxr->data, auxr->slen); - pos += auxf->slen; - } - if (auxf != find) bdestroy (auxf); - if (auxr != repl) bdestroy (auxr); - return BSTR_OK; - } - - /* shrinking replacement since auxf->slen > auxr->slen */ - if (delta > 0) { - acc = 0; - - while ((i = instr (b, pos, auxf)) >= 0) { - if (acc && i > pos) - bstr__memmove (b->data + pos - acc, b->data + pos, i - pos); - if (auxr->slen) - bstr__memcpy (b->data + i - acc, auxr->data, auxr->slen); - acc += delta; - pos = i + auxf->slen; - } - - if (acc) { - i = b->slen; - if (i > pos) - bstr__memmove (b->data + pos - acc, b->data + pos, i - pos); - b->slen -= acc; - b->data[b->slen] = (unsigned char) '\0'; - } - - if (auxf != find) bdestroy (auxf); - if (auxr != repl) bdestroy (auxr); - return BSTR_OK; - } - - /* expanding replacement since find->slen < repl->slen. Its a lot - more complicated. This works by first finding all the matches and - storing them to a growable array, then doing at most one resize of - the destination bstring and then performing the direct memory transfers - of the string segment pieces to form the final result. The growable - array of matches uses a deferred doubling reallocing strategy. What - this means is that it starts as a reasonably fixed sized auto array in - the hopes that many if not most cases will never need to grow this - array. But it switches as soon as the bounds of the array will be - exceeded. An extra find result is always appended to this array that - corresponds to the end of the destination string, so slen is checked - against mlen - 1 rather than mlen before resizing. - */ - - mlen = INITIAL_STATIC_FIND_INDEX_COUNT; - d = (int *) static_d; /* Avoid malloc for trivial/initial cases */ - acc = slen = 0; - - while ((pos = instr (b, pos, auxf)) >= 0) { - if (slen >= mlen - 1) { - int sl, *t; - - mlen += mlen; - sl = sizeof (int *) * mlen; - if (static_d == d) d = NULL; /* static_d cannot be realloced */ - if (mlen <= 0 || sl < mlen || NULL == (t = (int *) bstr__realloc (d, sl))) { - ret = BSTR_ERR; - goto done; - } - if (NULL == d) bstr__memcpy (t, static_d, sizeof (static_d)); - d = t; - } - d[slen] = pos; - slen++; - acc -= delta; - pos += auxf->slen; - if (pos < 0 || acc < 0) { - ret = BSTR_ERR; - goto done; - } - } - - /* slen <= INITIAL_STATIC_INDEX_COUNT-1 or mlen-1 here. */ - d[slen] = b->slen; - - if (BSTR_OK == (ret = balloc (b, b->slen + acc + 1))) { - b->slen += acc; - for (i = slen-1; i >= 0; i--) { - int s, l; - s = d[i] + auxf->slen; - l = d[i+1] - s; /* d[slen] may be accessed here. */ - if (l) { - bstr__memmove (b->data + s + acc, b->data + s, l); - } - if (auxr->slen) { - bstr__memmove (b->data + s + acc - auxr->slen, - auxr->data, auxr->slen); - } - acc += delta; - } - b->data[b->slen] = (unsigned char) '\0'; - } - - done:; - if (static_d == d) d = NULL; - bstr__free (d); - if (auxf != find) bdestroy (auxf); - if (auxr != repl) bdestroy (auxr); - return ret; -} - -/* int bfindreplace (bstring b, const_bstring find, const_bstring repl, +static int findreplaceengine(bstring b, const_bstring find, const_bstring repl, int pos, instr_fnptr instr) +{ + int i, ret, slen, mlen, delta, acc; + int * d; + int static_d[INITIAL_STATIC_FIND_INDEX_COUNT + 1]; /* This +1 is unnecessary, but it shuts up LINT. */ + ptrdiff_t pd; + bstring auxf = (bstring)find; + bstring auxr = (bstring)repl; + + if (b == NULL || b->data == NULL || find == NULL || + find->data == NULL || repl == NULL || repl->data == NULL || + pos < 0 || find->slen <= 0 || b->mlen < 0 || b->slen > b->mlen || + b->mlen <= 0 || b->slen < 0 || repl->slen < 0) return BSTR_ERR; + if (pos > b->slen - find->slen) return BSTR_OK; + + /* Alias with find string */ + pd = (ptrdiff_t)(find->data - b->data); + if ((ptrdiff_t)(pos - find->slen) < pd && pd < (ptrdiff_t)b->slen) + { + if (NULL == (auxf = bstrcpy(find))) return BSTR_ERR; + } + + /* Alias with repl string */ + pd = (ptrdiff_t)(repl->data - b->data); + if ((ptrdiff_t)(pos - repl->slen) < pd && pd < (ptrdiff_t)b->slen) + { + if (NULL == (auxr = bstrcpy(repl))) + { + if (auxf != find) bdestroy(auxf); + return BSTR_ERR; + } + } + + delta = auxf->slen - auxr->slen; + + /* in-place replacement since find and replace strings are of equal + length */ + if (delta == 0) + { + while ((pos = instr(b, pos, auxf)) >= 0) + { + bstr__memcpy(b->data + pos, auxr->data, auxr->slen); + pos += auxf->slen; + } + if (auxf != find) bdestroy(auxf); + if (auxr != repl) bdestroy(auxr); + return BSTR_OK; + } + + /* shrinking replacement since auxf->slen > auxr->slen */ + if (delta > 0) + { + acc = 0; + + while ((i = instr(b, pos, auxf)) >= 0) + { + if (acc && i > pos) + bstr__memmove(b->data + pos - acc, b->data + pos, i - pos); + if (auxr->slen) + bstr__memcpy(b->data + i - acc, auxr->data, auxr->slen); + acc += delta; + pos = i + auxf->slen; + } + + if (acc) + { + i = b->slen; + if (i > pos) + bstr__memmove(b->data + pos - acc, b->data + pos, i - pos); + b->slen -= acc; + b->data[b->slen] = (unsigned char)'\0'; + } + + if (auxf != find) bdestroy(auxf); + if (auxr != repl) bdestroy(auxr); + return BSTR_OK; + } + + /* expanding replacement since find->slen < repl->slen. Its a lot + more complicated. This works by first finding all the matches and + storing them to a growable array, then doing at most one resize of + the destination bstring and then performing the direct memory transfers + of the string segment pieces to form the final result. The growable + array of matches uses a deferred doubling reallocing strategy. What + this means is that it starts as a reasonably fixed sized auto array in + the hopes that many if not most cases will never need to grow this + array. But it switches as soon as the bounds of the array will be + exceeded. An extra find result is always appended to this array that + corresponds to the end of the destination string, so slen is checked + against mlen - 1 rather than mlen before resizing. + */ + + mlen = INITIAL_STATIC_FIND_INDEX_COUNT; + d = (int *)static_d; /* Avoid malloc for trivial/initial cases */ + acc = slen = 0; + + while ((pos = instr(b, pos, auxf)) >= 0) + { + if (slen >= mlen - 1) + { + int sl, *t; + + mlen += mlen; + sl = sizeof(int *) * mlen; + if (static_d == d) d = NULL; /* static_d cannot be realloced */ + if (mlen <= 0 || sl < mlen || NULL == (t = (int *)bstr__realloc(d, sl))) + { + ret = BSTR_ERR; + goto done; + } + if (NULL == d) bstr__memcpy(t, static_d, sizeof(static_d)); + d = t; + } + d[slen] = pos; + slen++; + acc -= delta; + pos += auxf->slen; + if (pos < 0 || acc < 0) + { + ret = BSTR_ERR; + goto done; + } + } + + /* slen <= INITIAL_STATIC_INDEX_COUNT-1 or mlen-1 here. */ + d[slen] = b->slen; + + if (BSTR_OK == (ret = balloc(b, b->slen + acc + 1))) + { + b->slen += acc; + for (i = slen - 1; i >= 0; i--) + { + int s, l; + s = d[i] + auxf->slen; + l = d[i + 1] - s; /* d[slen] may be accessed here. */ + if (l) + { + bstr__memmove(b->data + s + acc, b->data + s, l); + } + if (auxr->slen) + { + bstr__memmove(b->data + s + acc - auxr->slen, + auxr->data, auxr->slen); + } + acc += delta; + } + b->data[b->slen] = (unsigned char)'\0'; + } + +done:; + if (static_d == d) d = NULL; + bstr__free(d); + if (auxf != find) bdestroy(auxf); + if (auxr != repl) bdestroy(auxr); + return ret; +} + +/* int bfindreplace (bstring b, const_bstring find, const_bstring repl, * int pos) * * Replace all occurrences of a find string with a replace string after a * given point in a bstring. */ -int bfindreplace (bstring b, const_bstring find, const_bstring repl, int pos) { - return findreplaceengine (b, find, repl, pos, binstr); +int bfindreplace(bstring b, const_bstring find, const_bstring repl, int pos) +{ + return findreplaceengine(b, find, repl, pos, binstr); } -/* int bfindreplacecaseless (bstring b, const_bstring find, const_bstring repl, +/* int bfindreplacecaseless (bstring b, const_bstring find, const_bstring repl, * int pos) * - * Replace all occurrences of a find string, ignoring case, with a replace + * Replace all occurrences of a find string, ignoring case, with a replace * string after a given point in a bstring. */ -int bfindreplacecaseless (bstring b, const_bstring find, const_bstring repl, int pos) { - return findreplaceengine (b, find, repl, pos, binstrcaseless); +int bfindreplacecaseless(bstring b, const_bstring find, const_bstring repl, int pos) +{ + return findreplaceengine(b, find, repl, pos, binstrcaseless); } /* int binsertch (bstring b, int pos, int len, unsigned char fill) * - * Inserts the character fill repeatedly into b at position pos for a - * length len. If the position pos is past the end of b, then the - * character "fill" is appended as necessary to make up the gap between the + * Inserts the character fill repeatedly into b at position pos for a + * length len. If the position pos is past the end of b, then the + * character "fill" is appended as necessary to make up the gap between the * end of b and the position pos + len. */ -int binsertch (bstring b, int pos, int len, unsigned char fill) { -int d, l, i; - - if (pos < 0 || b == NULL || b->slen < 0 || b->mlen < b->slen || - b->mlen <= 0 || len < 0) return BSTR_ERR; - - /* Compute the two possible end pointers */ - d = b->slen + len; - l = pos + len; - if ((d|l) < 0) return BSTR_ERR; - - if (l > d) { - /* Inserting past the end of the string */ - if (balloc (b, l + 1) != BSTR_OK) return BSTR_ERR; - pos = b->slen; - b->slen = l; - } else { - /* Inserting in the middle of the string */ - if (balloc (b, d + 1) != BSTR_OK) return BSTR_ERR; - for (i = d - 1; i >= l; i--) { - b->data[i] = b->data[i - len]; - } - b->slen = d; - } - - for (i=pos; i < l; i++) b->data[i] = fill; - b->data[b->slen] = (unsigned char) '\0'; - return BSTR_OK; +int binsertch(bstring b, int pos, int len, unsigned char fill) +{ + int d, l, i; + + if (pos < 0 || b == NULL || b->slen < 0 || b->mlen < b->slen || + b->mlen <= 0 || len < 0) return BSTR_ERR; + + /* Compute the two possible end pointers */ + d = b->slen + len; + l = pos + len; + if ((d | l) < 0) return BSTR_ERR; + + if (l > d) + { + /* Inserting past the end of the string */ + if (balloc(b, l + 1) != BSTR_OK) return BSTR_ERR; + pos = b->slen; + b->slen = l; + } + else + { + /* Inserting in the middle of the string */ + if (balloc(b, d + 1) != BSTR_OK) return BSTR_ERR; + for (i = d - 1; i >= l; i--) + { + b->data[i] = b->data[i - len]; + } + b->slen = d; + } + + for (i = pos; i < l; i++) + b->data[i] = fill; + b->data[b->slen] = (unsigned char)'\0'; + return BSTR_OK; } /* int bpattern (bstring b, int len) * - * Replicate the bstring, b in place, end to end repeatedly until it - * surpasses len characters, then chop the result to exactly len characters. - * This function operates in-place. The function will return with BSTR_ERR + * Replicate the bstring, b in place, end to end repeatedly until it + * surpasses len characters, then chop the result to exactly len characters. + * This function operates in-place. The function will return with BSTR_ERR * if b is NULL or of length 0, otherwise BSTR_OK is returned. */ -int bpattern (bstring b, int len) { -int i, d; +int bpattern(bstring b, int len) +{ + int i, d; - d = blength (b); - if (d <= 0 || len < 0 || balloc (b, len + 1) != BSTR_OK) return BSTR_ERR; - if (len > 0) { - if (d == 1) return bsetstr (b, len, NULL, b->data[0]); - for (i = d; i < len; i++) b->data[i] = b->data[i - d]; - } - b->data[len] = (unsigned char) '\0'; - b->slen = len; - return BSTR_OK; + d = blength(b); + if (d <= 0 || len < 0 || balloc(b, len + 1) != BSTR_OK) return BSTR_ERR; + if (len > 0) + { + if (d == 1) return bsetstr(b, len, NULL, b->data[0]); + for (i = d; i < len; i++) + b->data[i] = b->data[i - d]; + } + b->data[len] = (unsigned char)'\0'; + b->slen = len; + return BSTR_OK; } #define BS_BUFF_SZ (1024) /* int breada (bstring b, bNread readPtr, void * parm) * - * Use a finite buffer fread-like function readPtr to concatenate to the - * bstring b the entire contents of file-like source data in a roughly + * Use a finite buffer fread-like function readPtr to concatenate to the + * bstring b the entire contents of file-like source data in a roughly * efficient way. */ -int breada (bstring b, bNread readPtr, void * parm) { -int i, l, n; +int breada(bstring b, bNread readPtr, void * parm) +{ + int i, l, n; - if (b == NULL || b->mlen <= 0 || b->slen < 0 || b->mlen < b->slen || - b->mlen <= 0 || readPtr == NULL) return BSTR_ERR; + if (b == NULL || b->mlen <= 0 || b->slen < 0 || b->mlen < b->slen || + b->mlen <= 0 || readPtr == NULL) return BSTR_ERR; - i = b->slen; - for (n=i+16; ; n += ((n < BS_BUFF_SZ) ? n : BS_BUFF_SZ)) { - if (BSTR_OK != balloc (b, n + 1)) return BSTR_ERR; - l = (int) readPtr ((void *) (b->data + i), 1, n - i, parm); - i += l; - b->slen = i; - if (i < n) break; - } + i = b->slen; + for (n = i + 16;; n += ((n < BS_BUFF_SZ) ? n : BS_BUFF_SZ)) + { + if (BSTR_OK != balloc(b, n + 1)) return BSTR_ERR; + l = (int)readPtr((void *)(b->data + i), 1, n - i, parm); + i += l; + b->slen = i; + if (i < n) break; + } - b->data[i] = (unsigned char) '\0'; - return BSTR_OK; + b->data[i] = (unsigned char)'\0'; + return BSTR_OK; } /* bstring bread (bNread readPtr, void * parm) * - * Use a finite buffer fread-like function readPtr to create a bstring - * filled with the entire contents of file-like source data in a roughly + * Use a finite buffer fread-like function readPtr to create a bstring + * filled with the entire contents of file-like source data in a roughly * efficient way. */ -bstring bread (bNread readPtr, void * parm) { -bstring buff; +bstring bread(bNread readPtr, void * parm) +{ + bstring buff; - if (0 > breada (buff = bfromcstr (""), readPtr, parm)) { - bdestroy (buff); - return NULL; - } - return buff; + if (0 > breada(buff = bfromcstr(""), readPtr, parm)) + { + bdestroy(buff); + return NULL; + } + return buff; } /* int bassigngets (bstring b, bNgetc getcPtr, void * parm, char terminator) * - * Use an fgetc-like single character stream reading function (getcPtr) to + * Use an fgetc-like single character stream reading function (getcPtr) to * obtain a sequence of characters which are concatenated to the end of the - * bstring b. The stream read is terminated by the passed in terminator + * bstring b. The stream read is terminated by the passed in terminator * parameter. * - * If getcPtr returns with a negative number, or the terminator character - * (which is appended) is read, then the stream reading is halted and the + * If getcPtr returns with a negative number, or the terminator character + * (which is appended) is read, then the stream reading is halted and the * function returns with a partial result in b. If there is an empty partial - * result, 1 is returned. If no characters are read, or there is some other + * result, 1 is returned. If no characters are read, or there is some other * detectable error, BSTR_ERR is returned. */ -int bassigngets (bstring b, bNgetc getcPtr, void * parm, char terminator) { -int c, d, e; +int bassigngets(bstring b, bNgetc getcPtr, void * parm, char terminator) +{ + int c, d, e; - if (b == NULL || b->mlen <= 0 || b->slen < 0 || b->mlen < b->slen || - b->mlen <= 0 || getcPtr == NULL) return BSTR_ERR; - d = 0; - e = b->mlen - 2; + if (b == NULL || b->mlen <= 0 || b->slen < 0 || b->mlen < b->slen || + b->mlen <= 0 || getcPtr == NULL) return BSTR_ERR; + d = 0; + e = b->mlen - 2; - while ((c = getcPtr (parm)) >= 0) { - if (d > e) { - b->slen = d; - if (balloc (b, d + 2) != BSTR_OK) return BSTR_ERR; - e = b->mlen - 2; - } - b->data[d] = (unsigned char) c; - d++; - if (c == terminator) break; - } + while ((c = getcPtr(parm)) >= 0) + { + if (d > e) + { + b->slen = d; + if (balloc(b, d + 2) != BSTR_OK) return BSTR_ERR; + e = b->mlen - 2; + } + b->data[d] = (unsigned char)c; + d++; + if (c == terminator) break; + } - b->data[d] = (unsigned char) '\0'; - b->slen = d; + b->data[d] = (unsigned char)'\0'; + b->slen = d; - return d == 0 && c < 0; + return d == 0 && c < 0; } /* int bgetsa (bstring b, bNgetc getcPtr, void * parm, char terminator) * - * Use an fgetc-like single character stream reading function (getcPtr) to + * Use an fgetc-like single character stream reading function (getcPtr) to * obtain a sequence of characters which are concatenated to the end of the - * bstring b. The stream read is terminated by the passed in terminator + * bstring b. The stream read is terminated by the passed in terminator * parameter. * - * If getcPtr returns with a negative number, or the terminator character - * (which is appended) is read, then the stream reading is halted and the - * function returns with a partial result concatentated to b. If there is - * an empty partial result, 1 is returned. If no characters are read, or + * If getcPtr returns with a negative number, or the terminator character + * (which is appended) is read, then the stream reading is halted and the + * function returns with a partial result concatentated to b. If there is + * an empty partial result, 1 is returned. If no characters are read, or * there is some other detectable error, BSTR_ERR is returned. */ -int bgetsa (bstring b, bNgetc getcPtr, void * parm, char terminator) { -int c, d, e; +int bgetsa(bstring b, bNgetc getcPtr, void * parm, char terminator) +{ + int c, d, e; - if (b == NULL || b->mlen <= 0 || b->slen < 0 || b->mlen < b->slen || - b->mlen <= 0 || getcPtr == NULL) return BSTR_ERR; - d = b->slen; - e = b->mlen - 2; + if (b == NULL || b->mlen <= 0 || b->slen < 0 || b->mlen < b->slen || + b->mlen <= 0 || getcPtr == NULL) return BSTR_ERR; + d = b->slen; + e = b->mlen - 2; - while ((c = getcPtr (parm)) >= 0) { - if (d > e) { - b->slen = d; - if (balloc (b, d + 2) != BSTR_OK) return BSTR_ERR; - e = b->mlen - 2; - } - b->data[d] = (unsigned char) c; - d++; - if (c == terminator) break; - } + while ((c = getcPtr(parm)) >= 0) + { + if (d > e) + { + b->slen = d; + if (balloc(b, d + 2) != BSTR_OK) return BSTR_ERR; + e = b->mlen - 2; + } + b->data[d] = (unsigned char)c; + d++; + if (c == terminator) break; + } - b->data[d] = (unsigned char) '\0'; - b->slen = d; + b->data[d] = (unsigned char)'\0'; + b->slen = d; - return d == 0 && c < 0; + return d == 0 && c < 0; } /* bstring bgets (bNgetc getcPtr, void * parm, char terminator) * - * Use an fgetc-like single character stream reading function (getcPtr) to - * obtain a sequence of characters which are concatenated into a bstring. + * Use an fgetc-like single character stream reading function (getcPtr) to + * obtain a sequence of characters which are concatenated into a bstring. * The stream read is terminated by the passed in terminator function. * - * If getcPtr returns with a negative number, or the terminator character - * (which is appended) is read, then the stream reading is halted and the - * result obtained thus far is returned. If no characters are read, or + * If getcPtr returns with a negative number, or the terminator character + * (which is appended) is read, then the stream reading is halted and the + * result obtained thus far is returned. If no characters are read, or * there is some other detectable error, NULL is returned. */ -bstring bgets (bNgetc getcPtr, void * parm, char terminator) { -bstring buff; - - if (0 > bgetsa (buff = bfromcstr (""), getcPtr, parm, terminator) || 0 >= buff->slen) { - bdestroy (buff); - buff = NULL; - } - return buff; -} - -struct bStream { - bstring buff; /* Buffer for over-reads */ - void * parm; /* The stream handle for core stream */ - bNread readFnPtr; /* fread compatible fnptr for core stream */ - int isEOF; /* track file's EOF state */ - int maxBuffSz; +bstring bgets(bNgetc getcPtr, void * parm, char terminator) +{ + bstring buff; + + if (0 > bgetsa(buff = bfromcstr(""), getcPtr, parm, terminator) || 0 >= buff->slen) + { + bdestroy(buff); + buff = NULL; + } + return buff; +} + +struct bStream +{ + bstring buff; /* Buffer for over-reads */ + void * parm; /* The stream handle for core stream */ + bNread readFnPtr; /* fread compatible fnptr for core stream */ + int isEOF; /* track file's EOF state */ + int maxBuffSz; }; /* struct bStream * bsopen (bNread readPtr, void * parm) * - * Wrap a given open stream (described by a fread compatible function - * pointer and stream handle) into an open bStream suitable for the bstring + * Wrap a given open stream (described by a fread compatible function + * pointer and stream handle) into an open bStream suitable for the bstring * library streaming functions. */ -struct bStream * bsopen (bNread readPtr, void * parm) { -struct bStream * s; +struct bStream * bsopen(bNread readPtr, void * parm) +{ + struct bStream * s; - if (readPtr == NULL) return NULL; - s = (struct bStream *) bstr__alloc (sizeof (struct bStream)); - if (s == NULL) return NULL; - s->parm = parm; - s->buff = bfromcstr (""); - s->readFnPtr = readPtr; - s->maxBuffSz = BS_BUFF_SZ; - s->isEOF = 0; - return s; + if (readPtr == NULL) return NULL; + s = (struct bStream *)bstr__alloc(sizeof(struct bStream)); + if (s == NULL) return NULL; + s->parm = parm; + s->buff = bfromcstr(""); + s->readFnPtr = readPtr; + s->maxBuffSz = BS_BUFF_SZ; + s->isEOF = 0; + return s; } /* int bsbufflength (struct bStream * s, int sz) * - * Set the length of the buffer used by the bStream. If sz is zero, the + * Set the length of the buffer used by the bStream. If sz is zero, the * length is not set. This function returns with the previous length. */ -int bsbufflength (struct bStream * s, int sz) { -int oldSz; - if (s == NULL || sz < 0) return BSTR_ERR; - oldSz = s->maxBuffSz; - if (sz > 0) s->maxBuffSz = sz; - return oldSz; +int bsbufflength(struct bStream * s, int sz) +{ + int oldSz; + if (s == NULL || sz < 0) return BSTR_ERR; + oldSz = s->maxBuffSz; + if (sz > 0) s->maxBuffSz = sz; + return oldSz; } -int bseof (const struct bStream * s) { - if (s == NULL || s->readFnPtr == NULL) return BSTR_ERR; - return s->isEOF && (s->buff->slen == 0); +int bseof(const struct bStream * s) +{ + if (s == NULL || s->readFnPtr == NULL) return BSTR_ERR; + return s->isEOF && (s->buff->slen == 0); } /* void * bsclose (struct bStream * s) @@ -1955,501 +2156,558 @@ int bseof (const struct bStream * s) { * Close the bStream, and return the handle to the stream that was originally * used to open the given stream. */ -void * bsclose (struct bStream * s) { -void * parm; - if (s == NULL) return NULL; - s->readFnPtr = NULL; - if (s->buff) bdestroy (s->buff); - s->buff = NULL; - parm = s->parm; - s->parm = NULL; - s->isEOF = 1; - bstr__free (s); - return parm; +void * bsclose(struct bStream * s) +{ + void * parm; + if (s == NULL) return NULL; + s->readFnPtr = NULL; + if (s->buff) bdestroy(s->buff); + s->buff = NULL; + parm = s->parm; + s->parm = NULL; + s->isEOF = 1; + bstr__free(s); + return parm; } /* int bsreadlna (bstring r, struct bStream * s, char terminator) * * Read a bstring terminated by the terminator character or the end of the - * stream from the bStream (s) and return it into the parameter r. This - * function may read additional characters from the core stream that are not + * stream from the bStream (s) and return it into the parameter r. This + * function may read additional characters from the core stream that are not * returned, but will be retained for subsequent read operations. */ -int bsreadlna (bstring r, struct bStream * s, char terminator) { -int i, l, ret, rlo; -char * b; -struct tagbstring x; - - if (s == NULL || s->buff == NULL || r == NULL || r->mlen <= 0 || - r->slen < 0 || r->mlen < r->slen) return BSTR_ERR; - l = s->buff->slen; - if (BSTR_OK != balloc (s->buff, s->maxBuffSz + 1)) return BSTR_ERR; - b = (char *) s->buff->data; - x.data = (unsigned char *) b; - - /* First check if the current buffer holds the terminator */ - b[l] = terminator; /* Set sentinel */ - for (i=0; b[i] != terminator; i++) ; - if (i < l) { - x.slen = i + 1; - ret = bconcat (r, &x); - s->buff->slen = l; - if (BSTR_OK == ret) bdelete (s->buff, 0, i + 1); - return BSTR_OK; - } - - rlo = r->slen; - - /* If not then just concatenate the entire buffer to the output */ - x.slen = l; - if (BSTR_OK != bconcat (r, &x)) return BSTR_ERR; - - /* Perform direct in-place reads into the destination to allow for - the minimum of data-copies */ - for (;;) { - if (BSTR_OK != balloc (r, r->slen + s->maxBuffSz + 1)) return BSTR_ERR; - b = (char *) (r->data + r->slen); - l = (int) s->readFnPtr (b, 1, s->maxBuffSz, s->parm); - if (l <= 0) { - r->data[r->slen] = (unsigned char) '\0'; - s->buff->slen = 0; - s->isEOF = 1; - /* If nothing was read return with an error message */ - return BSTR_ERR & -(r->slen == rlo); - } - b[l] = terminator; /* Set sentinel */ - for (i=0; b[i] != terminator; i++) ; - if (i < l) break; - r->slen += l; - } - - /* Terminator found, push over-read back to buffer */ - i++; - r->slen += i; - s->buff->slen = l - i; - bstr__memcpy (s->buff->data, b + i, l - i); - r->data[r->slen] = (unsigned char) '\0'; - return BSTR_OK; +int bsreadlna(bstring r, struct bStream * s, char terminator) +{ + int i, l, ret, rlo; + char * b; + struct tagbstring x; + + if (s == NULL || s->buff == NULL || r == NULL || r->mlen <= 0 || + r->slen < 0 || r->mlen < r->slen) return BSTR_ERR; + l = s->buff->slen; + if (BSTR_OK != balloc(s->buff, s->maxBuffSz + 1)) return BSTR_ERR; + b = (char *)s->buff->data; + x.data = (unsigned char *)b; + + /* First check if the current buffer holds the terminator */ + b[l] = terminator; /* Set sentinel */ + for (i = 0; b[i] != terminator; i++) + ; + if (i < l) + { + x.slen = i + 1; + ret = bconcat(r, &x); + s->buff->slen = l; + if (BSTR_OK == ret) bdelete(s->buff, 0, i + 1); + return BSTR_OK; + } + + rlo = r->slen; + + /* If not then just concatenate the entire buffer to the output */ + x.slen = l; + if (BSTR_OK != bconcat(r, &x)) return BSTR_ERR; + + /* Perform direct in-place reads into the destination to allow for + the minimum of data-copies */ + for (;;) + { + if (BSTR_OK != balloc(r, r->slen + s->maxBuffSz + 1)) return BSTR_ERR; + b = (char *)(r->data + r->slen); + l = (int)s->readFnPtr(b, 1, s->maxBuffSz, s->parm); + if (l <= 0) + { + r->data[r->slen] = (unsigned char)'\0'; + s->buff->slen = 0; + s->isEOF = 1; + /* If nothing was read return with an error message */ + return BSTR_ERR & -(r->slen == rlo); + } + b[l] = terminator; /* Set sentinel */ + for (i = 0; b[i] != terminator; i++) + ; + if (i < l) break; + r->slen += l; + } + + /* Terminator found, push over-read back to buffer */ + i++; + r->slen += i; + s->buff->slen = l - i; + bstr__memcpy(s->buff->data, b + i, l - i); + r->data[r->slen] = (unsigned char)'\0'; + return BSTR_OK; } /* int bsreadlnsa (bstring r, struct bStream * s, bstring term) * - * Read a bstring terminated by any character in the term string or the end - * of the stream from the bStream (s) and return it into the parameter r. - * This function may read additional characters from the core stream that + * Read a bstring terminated by any character in the term string or the end + * of the stream from the bStream (s) and return it into the parameter r. + * This function may read additional characters from the core stream that * are not returned, but will be retained for subsequent read operations. */ -int bsreadlnsa (bstring r, struct bStream * s, const_bstring term) { -int i, l, ret, rlo; -unsigned char * b; -struct tagbstring x; -struct charField cf; - - if (s == NULL || s->buff == NULL || r == NULL || term == NULL || - term->data == NULL || r->mlen <= 0 || r->slen < 0 || - r->mlen < r->slen) return BSTR_ERR; - if (term->slen == 1) return bsreadlna (r, s, term->data[0]); - if (term->slen < 1 || buildCharField (&cf, term)) return BSTR_ERR; - - l = s->buff->slen; - if (BSTR_OK != balloc (s->buff, s->maxBuffSz + 1)) return BSTR_ERR; - b = (unsigned char *) s->buff->data; - x.data = b; - - /* First check if the current buffer holds the terminator */ - b[l] = term->data[0]; /* Set sentinel */ - for (i=0; !testInCharField (&cf, b[i]); i++) ; - if (i < l) { - x.slen = i + 1; - ret = bconcat (r, &x); - s->buff->slen = l; - if (BSTR_OK == ret) bdelete (s->buff, 0, i + 1); - return BSTR_OK; - } - - rlo = r->slen; - - /* If not then just concatenate the entire buffer to the output */ - x.slen = l; - if (BSTR_OK != bconcat (r, &x)) return BSTR_ERR; - - /* Perform direct in-place reads into the destination to allow for - the minimum of data-copies */ - for (;;) { - if (BSTR_OK != balloc (r, r->slen + s->maxBuffSz + 1)) return BSTR_ERR; - b = (unsigned char *) (r->data + r->slen); - l = (int) s->readFnPtr (b, 1, s->maxBuffSz, s->parm); - if (l <= 0) { - r->data[r->slen] = (unsigned char) '\0'; - s->buff->slen = 0; - s->isEOF = 1; - /* If nothing was read return with an error message */ - return BSTR_ERR & -(r->slen == rlo); - } - - b[l] = term->data[0]; /* Set sentinel */ - for (i=0; !testInCharField (&cf, b[i]); i++) ; - if (i < l) break; - r->slen += l; - } - - /* Terminator found, push over-read back to buffer */ - i++; - r->slen += i; - s->buff->slen = l - i; - bstr__memcpy (s->buff->data, b + i, l - i); - r->data[r->slen] = (unsigned char) '\0'; - return BSTR_OK; +int bsreadlnsa(bstring r, struct bStream * s, const_bstring term) +{ + int i, l, ret, rlo; + unsigned char * b; + struct tagbstring x; + struct charField cf; + + if (s == NULL || s->buff == NULL || r == NULL || term == NULL || + term->data == NULL || r->mlen <= 0 || r->slen < 0 || + r->mlen < r->slen) return BSTR_ERR; + if (term->slen == 1) return bsreadlna(r, s, term->data[0]); + if (term->slen < 1 || buildCharField(&cf, term)) return BSTR_ERR; + + l = s->buff->slen; + if (BSTR_OK != balloc(s->buff, s->maxBuffSz + 1)) return BSTR_ERR; + b = (unsigned char *)s->buff->data; + x.data = b; + + /* First check if the current buffer holds the terminator */ + b[l] = term->data[0]; /* Set sentinel */ + for (i = 0; !testInCharField(&cf, b[i]); i++) + ; + if (i < l) + { + x.slen = i + 1; + ret = bconcat(r, &x); + s->buff->slen = l; + if (BSTR_OK == ret) bdelete(s->buff, 0, i + 1); + return BSTR_OK; + } + + rlo = r->slen; + + /* If not then just concatenate the entire buffer to the output */ + x.slen = l; + if (BSTR_OK != bconcat(r, &x)) return BSTR_ERR; + + /* Perform direct in-place reads into the destination to allow for + the minimum of data-copies */ + for (;;) + { + if (BSTR_OK != balloc(r, r->slen + s->maxBuffSz + 1)) return BSTR_ERR; + b = (unsigned char *)(r->data + r->slen); + l = (int)s->readFnPtr(b, 1, s->maxBuffSz, s->parm); + if (l <= 0) + { + r->data[r->slen] = (unsigned char)'\0'; + s->buff->slen = 0; + s->isEOF = 1; + /* If nothing was read return with an error message */ + return BSTR_ERR & -(r->slen == rlo); + } + + b[l] = term->data[0]; /* Set sentinel */ + for (i = 0; !testInCharField(&cf, b[i]); i++) + ; + if (i < l) break; + r->slen += l; + } + + /* Terminator found, push over-read back to buffer */ + i++; + r->slen += i; + s->buff->slen = l - i; + bstr__memcpy(s->buff->data, b + i, l - i); + r->data[r->slen] = (unsigned char)'\0'; + return BSTR_OK; } /* int bsreada (bstring r, struct bStream * s, int n) * - * Read a bstring of length n (or, if it is fewer, as many bytes as is - * remaining) from the bStream. This function may read additional - * characters from the core stream that are not returned, but will be + * Read a bstring of length n (or, if it is fewer, as many bytes as is + * remaining) from the bStream. This function may read additional + * characters from the core stream that are not returned, but will be * retained for subsequent read operations. This function will not read * additional characters from the core stream beyond virtual stream pointer. */ -int bsreada (bstring r, struct bStream * s, int n) { -int l, ret, orslen; -char * b; -struct tagbstring x; - - if (s == NULL || s->buff == NULL || r == NULL || r->mlen <= 0 - || r->slen < 0 || r->mlen < r->slen || n <= 0) return BSTR_ERR; - - n += r->slen; - if (n <= 0) return BSTR_ERR; - - l = s->buff->slen; - - orslen = r->slen; - - if (0 == l) { - if (s->isEOF) return BSTR_ERR; - if (r->mlen > n) { - l = (int) s->readFnPtr (r->data + r->slen, 1, n - r->slen, s->parm); - if (0 >= l || l > n - r->slen) { - s->isEOF = 1; - return BSTR_ERR; - } - r->slen += l; - r->data[r->slen] = (unsigned char) '\0'; - return 0; - } - } - - if (BSTR_OK != balloc (s->buff, s->maxBuffSz + 1)) return BSTR_ERR; - b = (char *) s->buff->data; - x.data = (unsigned char *) b; - - do { - if (l + r->slen >= n) { - x.slen = n - r->slen; - ret = bconcat (r, &x); - s->buff->slen = l; - if (BSTR_OK == ret) bdelete (s->buff, 0, x.slen); - return BSTR_ERR & -(r->slen == orslen); - } - - x.slen = l; - if (BSTR_OK != bconcat (r, &x)) break; - - l = n - r->slen; - if (l > s->maxBuffSz) l = s->maxBuffSz; - - l = (int) s->readFnPtr (b, 1, l, s->parm); - - } while (l > 0); - if (l < 0) l = 0; - if (l == 0) s->isEOF = 1; - s->buff->slen = l; - return BSTR_ERR & -(r->slen == orslen); +int bsreada(bstring r, struct bStream * s, int n) +{ + int l, ret, orslen; + char * b; + struct tagbstring x; + + if (s == NULL || s->buff == NULL || r == NULL || r->mlen <= 0 + || r->slen < 0 || r->mlen < r->slen || n <= 0) return BSTR_ERR; + + n += r->slen; + if (n <= 0) return BSTR_ERR; + + l = s->buff->slen; + + orslen = r->slen; + + if (0 == l) + { + if (s->isEOF) return BSTR_ERR; + if (r->mlen > n) + { + l = (int)s->readFnPtr(r->data + r->slen, 1, n - r->slen, s->parm); + if (0 >= l || l > n - r->slen) + { + s->isEOF = 1; + return BSTR_ERR; + } + r->slen += l; + r->data[r->slen] = (unsigned char)'\0'; + return 0; + } + } + + if (BSTR_OK != balloc(s->buff, s->maxBuffSz + 1)) return BSTR_ERR; + b = (char *)s->buff->data; + x.data = (unsigned char *)b; + + do + { + if (l + r->slen >= n) + { + x.slen = n - r->slen; + ret = bconcat(r, &x); + s->buff->slen = l; + if (BSTR_OK == ret) bdelete(s->buff, 0, x.slen); + return BSTR_ERR & -(r->slen == orslen); + } + + x.slen = l; + if (BSTR_OK != bconcat(r, &x)) break; + + l = n - r->slen; + if (l > s->maxBuffSz) l = s->maxBuffSz; + + l = (int)s->readFnPtr(b, 1, l, s->parm); + } + while (l > 0); + if (l < 0) l = 0; + if (l == 0) s->isEOF = 1; + s->buff->slen = l; + return BSTR_ERR & -(r->slen == orslen); } /* int bsreadln (bstring r, struct bStream * s, char terminator) * * Read a bstring terminated by the terminator character or the end of the - * stream from the bStream (s) and return it into the parameter r. This - * function may read additional characters from the core stream that are not + * stream from the bStream (s) and return it into the parameter r. This + * function may read additional characters from the core stream that are not * returned, but will be retained for subsequent read operations. */ -int bsreadln (bstring r, struct bStream * s, char terminator) { - if (s == NULL || s->buff == NULL || r == NULL || r->mlen <= 0) - return BSTR_ERR; - if (BSTR_OK != balloc (s->buff, s->maxBuffSz + 1)) return BSTR_ERR; - r->slen = 0; - return bsreadlna (r, s, terminator); +int bsreadln(bstring r, struct bStream * s, char terminator) +{ + if (s == NULL || s->buff == NULL || r == NULL || r->mlen <= 0) + return BSTR_ERR; + if (BSTR_OK != balloc(s->buff, s->maxBuffSz + 1)) return BSTR_ERR; + r->slen = 0; + return bsreadlna(r, s, terminator); } /* int bsreadlns (bstring r, struct bStream * s, bstring term) * - * Read a bstring terminated by any character in the term string or the end - * of the stream from the bStream (s) and return it into the parameter r. - * This function may read additional characters from the core stream that + * Read a bstring terminated by any character in the term string or the end + * of the stream from the bStream (s) and return it into the parameter r. + * This function may read additional characters from the core stream that * are not returned, but will be retained for subsequent read operations. */ -int bsreadlns (bstring r, struct bStream * s, const_bstring term) { - if (s == NULL || s->buff == NULL || r == NULL || term == NULL - || term->data == NULL || r->mlen <= 0) return BSTR_ERR; - if (term->slen == 1) return bsreadln (r, s, term->data[0]); - if (term->slen < 1) return BSTR_ERR; - if (BSTR_OK != balloc (s->buff, s->maxBuffSz + 1)) return BSTR_ERR; - r->slen = 0; - return bsreadlnsa (r, s, term); +int bsreadlns(bstring r, struct bStream * s, const_bstring term) +{ + if (s == NULL || s->buff == NULL || r == NULL || term == NULL + || term->data == NULL || r->mlen <= 0) return BSTR_ERR; + if (term->slen == 1) return bsreadln(r, s, term->data[0]); + if (term->slen < 1) return BSTR_ERR; + if (BSTR_OK != balloc(s->buff, s->maxBuffSz + 1)) return BSTR_ERR; + r->slen = 0; + return bsreadlnsa(r, s, term); } /* int bsread (bstring r, struct bStream * s, int n) * - * Read a bstring of length n (or, if it is fewer, as many bytes as is - * remaining) from the bStream. This function may read additional - * characters from the core stream that are not returned, but will be + * Read a bstring of length n (or, if it is fewer, as many bytes as is + * remaining) from the bStream. This function may read additional + * characters from the core stream that are not returned, but will be * retained for subsequent read operations. This function will not read * additional characters from the core stream beyond virtual stream pointer. */ -int bsread (bstring r, struct bStream * s, int n) { - if (s == NULL || s->buff == NULL || r == NULL || r->mlen <= 0 - || n <= 0) return BSTR_ERR; - if (BSTR_OK != balloc (s->buff, s->maxBuffSz + 1)) return BSTR_ERR; - r->slen = 0; - return bsreada (r, s, n); +int bsread(bstring r, struct bStream * s, int n) +{ + if (s == NULL || s->buff == NULL || r == NULL || r->mlen <= 0 + || n <= 0) return BSTR_ERR; + if (BSTR_OK != balloc(s->buff, s->maxBuffSz + 1)) return BSTR_ERR; + r->slen = 0; + return bsreada(r, s, n); } /* int bsunread (struct bStream * s, const_bstring b) * - * Insert a bstring into the bStream at the current position. These - * characters will be read prior to those that actually come from the core + * Insert a bstring into the bStream at the current position. These + * characters will be read prior to those that actually come from the core * stream. */ -int bsunread (struct bStream * s, const_bstring b) { - if (s == NULL || s->buff == NULL) return BSTR_ERR; - return binsert (s->buff, 0, b, (unsigned char) '?'); +int bsunread(struct bStream * s, const_bstring b) +{ + if (s == NULL || s->buff == NULL) return BSTR_ERR; + return binsert(s->buff, 0, b, (unsigned char)'?'); } /* int bspeek (bstring r, const struct bStream * s) * - * Return the currently buffered characters from the bStream that will be + * Return the currently buffered characters from the bStream that will be * read prior to reads from the core stream. */ -int bspeek (bstring r, const struct bStream * s) { - if (s == NULL || s->buff == NULL) return BSTR_ERR; - return bassign (r, s->buff); +int bspeek(bstring r, const struct bStream * s) +{ + if (s == NULL || s->buff == NULL) return BSTR_ERR; + return bassign(r, s->buff); } /* bstring bjoin (const struct bstrList * bl, const_bstring sep); * - * Join the entries of a bstrList into one bstring by sequentially - * concatenating them with the sep string in between. If there is an error + * Join the entries of a bstrList into one bstring by sequentially + * concatenating them with the sep string in between. If there is an error * NULL is returned, otherwise a bstring with the correct result is returned. */ -bstring bjoin (const struct bstrList * bl, const_bstring sep) { -bstring b; -int i, c, v; - - if (bl == NULL || bl->qty < 0) return NULL; - if (sep != NULL && (sep->slen < 0 || sep->data == NULL)) return NULL; - - for (i = 0, c = 1; i < bl->qty; i++) { - v = bl->entry[i]->slen; - if (v < 0) return NULL; /* Invalid input */ - c += v; - if (c < 0) return NULL; /* Wrap around ?? */ - } - - if (sep != NULL) c += (bl->qty - 1) * sep->slen; - - b = (bstring) bstr__alloc (sizeof (struct tagbstring)); - if (NULL == b) return NULL; /* Out of memory */ - b->data = (unsigned char *) bstr__alloc (c); - if (b->data == NULL) { - bstr__free (b); - return NULL; - } - - b->mlen = c; - b->slen = c-1; - - for (i = 0, c = 0; i < bl->qty; i++) { - if (i > 0 && sep != NULL) { - bstr__memcpy (b->data + c, sep->data, sep->slen); - c += sep->slen; - } - v = bl->entry[i]->slen; - bstr__memcpy (b->data + c, bl->entry[i]->data, v); - c += v; - } - b->data[c] = (unsigned char) '\0'; - return b; +bstring bjoin(const struct bstrList * bl, const_bstring sep) +{ + bstring b; + int i, c, v; + + if (bl == NULL || bl->qty < 0) return NULL; + if (sep != NULL && (sep->slen < 0 || sep->data == NULL)) return NULL; + + for (i = 0, c = 1; i < bl->qty; i++) + { + v = bl->entry[i]->slen; + if (v < 0) return NULL; /* Invalid input */ + c += v; + if (c < 0) return NULL; /* Wrap around ?? */ + } + + if (sep != NULL) c += (bl->qty - 1) * sep->slen; + + b = (bstring)bstr__alloc(sizeof(struct tagbstring)); + if (NULL == b) return NULL; /* Out of memory */ + b->data = (unsigned char *)bstr__alloc(c); + if (b->data == NULL) + { + bstr__free(b); + return NULL; + } + + b->mlen = c; + b->slen = c - 1; + + for (i = 0, c = 0; i < bl->qty; i++) + { + if (i > 0 && sep != NULL) + { + bstr__memcpy(b->data + c, sep->data, sep->slen); + c += sep->slen; + } + v = bl->entry[i]->slen; + bstr__memcpy(b->data + c, bl->entry[i]->data, v); + c += v; + } + b->data[c] = (unsigned char)'\0'; + return b; } #define BSSSC_BUFF_LEN (256) -/* int bssplitscb (struct bStream * s, const_bstring splitStr, - * int (* cb) (void * parm, int ofs, const_bstring entry), void * parm) +/* int bssplitscb (struct bStream * s, const_bstring splitStr, + * int (* cb) (void * parm, int ofs, const_bstring entry), void * parm) * - * Iterate the set of disjoint sequential substrings read from a stream - * divided by any of the characters in splitStr. An empty splitStr causes + * Iterate the set of disjoint sequential substrings read from a stream + * divided by any of the characters in splitStr. An empty splitStr causes * the whole stream to be iterated once. * - * Note: At the point of calling the cb function, the bStream pointer is - * pointed exactly at the position right after having read the split + * Note: At the point of calling the cb function, the bStream pointer is + * pointed exactly at the position right after having read the split * character. The cb function can act on the stream by causing the bStream * pointer to move, and bssplitscb will continue by starting the next split * at the position of the pointer after the return from cb. * * However, if the cb causes the bStream s to be destroyed then the cb must - * return with a negative value, otherwise bssplitscb will continue in an + * return with a negative value, otherwise bssplitscb will continue in an * undefined manner. */ -int bssplitscb (struct bStream * s, const_bstring splitStr, - int (* cb) (void * parm, int ofs, const_bstring entry), void * parm) { -struct charField chrs; -bstring buff; -int i, p, ret; - - if (cb == NULL || s == NULL || s->readFnPtr == NULL - || splitStr == NULL || splitStr->slen < 0) return BSTR_ERR; - - if (NULL == (buff = bfromcstr (""))) return BSTR_ERR; - - if (splitStr->slen == 0) { - while (bsreada (buff, s, BSSSC_BUFF_LEN) >= 0) ; - if ((ret = cb (parm, 0, buff)) > 0) - ret = 0; - } else { - buildCharField (&chrs, splitStr); - ret = p = i = 0; - for (;;) { - if (i >= buff->slen) { - bsreada (buff, s, BSSSC_BUFF_LEN); - if (i >= buff->slen) { - if (0 < (ret = cb (parm, p, buff))) ret = 0; - break; - } - } - if (testInCharField (&chrs, buff->data[i])) { - struct tagbstring t; - unsigned char c; - - blk2tbstr (t, buff->data + i + 1, buff->slen - (i + 1)); - if ((ret = bsunread (s, &t)) < 0) break; - buff->slen = i; - c = buff->data[i]; - buff->data[i] = (unsigned char) '\0'; - if ((ret = cb (parm, p, buff)) < 0) break; - buff->data[i] = c; - buff->slen = 0; - p += i + 1; - i = -1; - } - i++; - } - } - - bdestroy (buff); - return ret; -} - -/* int bssplitstrcb (struct bStream * s, const_bstring splitStr, - * int (* cb) (void * parm, int ofs, const_bstring entry), void * parm) - * - * Iterate the set of disjoint sequential substrings read from a stream - * divided by the entire substring splitStr. An empty splitStr causes +int bssplitscb(struct bStream * s, const_bstring splitStr, + int (* cb)(void * parm, int ofs, const_bstring entry), void * parm) +{ + struct charField chrs; + bstring buff; + int i, p, ret; + + if (cb == NULL || s == NULL || s->readFnPtr == NULL + || splitStr == NULL || splitStr->slen < 0) return BSTR_ERR; + + if (NULL == (buff = bfromcstr(""))) return BSTR_ERR; + + if (splitStr->slen == 0) + { + while (bsreada(buff, s, BSSSC_BUFF_LEN) >= 0) + ; + if ((ret = cb(parm, 0, buff)) > 0) + ret = 0; + } + else + { + buildCharField(&chrs, splitStr); + ret = p = i = 0; + for (;;) + { + if (i >= buff->slen) + { + bsreada(buff, s, BSSSC_BUFF_LEN); + if (i >= buff->slen) + { + if (0 < (ret = cb(parm, p, buff))) ret = 0; + break; + } + } + if (testInCharField(&chrs, buff->data[i])) + { + struct tagbstring t; + unsigned char c; + + blk2tbstr(t, buff->data + i + 1, buff->slen - (i + 1)); + if ((ret = bsunread(s, &t)) < 0) break; + buff->slen = i; + c = buff->data[i]; + buff->data[i] = (unsigned char)'\0'; + if ((ret = cb(parm, p, buff)) < 0) break; + buff->data[i] = c; + buff->slen = 0; + p += i + 1; + i = -1; + } + i++; + } + } + + bdestroy(buff); + return ret; +} + +/* int bssplitstrcb (struct bStream * s, const_bstring splitStr, + * int (* cb) (void * parm, int ofs, const_bstring entry), void * parm) + * + * Iterate the set of disjoint sequential substrings read from a stream + * divided by the entire substring splitStr. An empty splitStr causes * each character of the stream to be iterated. * - * Note: At the point of calling the cb function, the bStream pointer is - * pointed exactly at the position right after having read the split + * Note: At the point of calling the cb function, the bStream pointer is + * pointed exactly at the position right after having read the split * character. The cb function can act on the stream by causing the bStream * pointer to move, and bssplitscb will continue by starting the next split * at the position of the pointer after the return from cb. * * However, if the cb causes the bStream s to be destroyed then the cb must - * return with a negative value, otherwise bssplitscb will continue in an + * return with a negative value, otherwise bssplitscb will continue in an * undefined manner. */ -int bssplitstrcb (struct bStream * s, const_bstring splitStr, - int (* cb) (void * parm, int ofs, const_bstring entry), void * parm) { -bstring buff; -int i, p, ret; - - if (cb == NULL || s == NULL || s->readFnPtr == NULL - || splitStr == NULL || splitStr->slen < 0) return BSTR_ERR; - - if (splitStr->slen == 1) return bssplitscb (s, splitStr, cb, parm); - - if (NULL == (buff = bfromcstr (""))) return BSTR_ERR; - - if (splitStr->slen == 0) { - for (i=0; bsreada (buff, s, BSSSC_BUFF_LEN) >= 0; i++) { - if ((ret = cb (parm, 0, buff)) < 0) { - bdestroy (buff); - return ret; - } - buff->slen = 0; - } - return BSTR_OK; - } else { - ret = p = i = 0; - for (i=p=0;;) { - if ((ret = binstr (buff, 0, splitStr)) >= 0) { - struct tagbstring t; - blk2tbstr (t, buff->data, ret); - i = ret + splitStr->slen; - if ((ret = cb (parm, p, &t)) < 0) break; - p += i; - bdelete (buff, 0, i); - } else { - bsreada (buff, s, BSSSC_BUFF_LEN); - if (bseof (s)) { - if ((ret = cb (parm, p, buff)) > 0) ret = 0; - break; - } - } - } - } - - bdestroy (buff); - return ret; +int bssplitstrcb(struct bStream * s, const_bstring splitStr, + int (* cb)(void * parm, int ofs, const_bstring entry), void * parm) +{ + bstring buff; + int i, p, ret; + + if (cb == NULL || s == NULL || s->readFnPtr == NULL + || splitStr == NULL || splitStr->slen < 0) return BSTR_ERR; + + if (splitStr->slen == 1) return bssplitscb(s, splitStr, cb, parm); + + if (NULL == (buff = bfromcstr(""))) return BSTR_ERR; + + if (splitStr->slen == 0) + { + for (i = 0; bsreada(buff, s, BSSSC_BUFF_LEN) >= 0; i++) + { + if ((ret = cb(parm, 0, buff)) < 0) + { + bdestroy(buff); + return ret; + } + buff->slen = 0; + } + return BSTR_OK; + } + else + { + ret = p = i = 0; + for (i = p = 0;;) + { + if ((ret = binstr(buff, 0, splitStr)) >= 0) + { + struct tagbstring t; + blk2tbstr(t, buff->data, ret); + i = ret + splitStr->slen; + if ((ret = cb(parm, p, &t)) < 0) break; + p += i; + bdelete(buff, 0, i); + } + else + { + bsreada(buff, s, BSSSC_BUFF_LEN); + if (bseof(s)) + { + if ((ret = cb(parm, p, buff)) > 0) ret = 0; + break; + } + } + } + } + + bdestroy(buff); + return ret; } /* int bstrListCreate (void) * * Create a bstrList. */ -struct bstrList * bstrListCreate (void) { -struct bstrList * sl = (struct bstrList *) bstr__alloc (sizeof (struct bstrList)); - if (sl) { - sl->entry = (bstring *) bstr__alloc (1*sizeof (bstring)); - if (!sl->entry) { - bstr__free (sl); - sl = NULL; - } else { - sl->qty = 0; - sl->mlen = 1; - } - } - return sl; +struct bstrList * bstrListCreate(void) +{ + struct bstrList * sl = (struct bstrList *)bstr__alloc(sizeof(struct bstrList)); + if (sl) + { + sl->entry = (bstring *)bstr__alloc(1 * sizeof(bstring)); + if (!sl->entry) + { + bstr__free(sl); + sl = NULL; + } + else + { + sl->qty = 0; + sl->mlen = 1; + } + } + return sl; } /* int bstrListDestroy (struct bstrList * sl) * * Destroy a bstrList that has been created by bsplit, bsplits or bstrListCreate. */ -int bstrListDestroy (struct bstrList * sl) { -int i; - if (sl == NULL || sl->qty < 0) return BSTR_ERR; - for (i=0; i < sl->qty; i++) { - if (sl->entry[i]) { - bdestroy (sl->entry[i]); - sl->entry[i] = NULL; - } - } - sl->qty = -1; - sl->mlen = -1; - bstr__free (sl->entry); - sl->entry = NULL; - bstr__free (sl); - return BSTR_OK; +int bstrListDestroy(struct bstrList * sl) +{ + int i; + if (sl == NULL || sl->qty < 0) return BSTR_ERR; + for (i = 0; i < sl->qty; i++) + { + if (sl->entry[i]) + { + bdestroy(sl->entry[i]); + sl->entry[i] = NULL; + } + } + sl->qty = -1; + sl->mlen = -1; + bstr__free(sl->entry); + sl->entry = NULL; + bstr__free(sl); + return BSTR_OK; } /* int bstrListAlloc (struct bstrList * sl, int msz) @@ -2457,25 +2715,27 @@ int i; * Ensure that there is memory for at least msz number of entries for the * list. */ -int bstrListAlloc (struct bstrList * sl, int msz) { -bstring * l; -int smsz; -size_t nsz; - if (!sl || msz <= 0 || !sl->entry || sl->qty < 0 || sl->mlen <= 0 || sl->qty > sl->mlen) return BSTR_ERR; - if (sl->mlen >= msz) return BSTR_OK; - smsz = snapUpSize (msz); - nsz = ((size_t) smsz) * sizeof (bstring); - if (nsz < (size_t) smsz) return BSTR_ERR; - l = (bstring *) bstr__realloc (sl->entry, nsz); - if (!l) { - smsz = msz; - nsz = ((size_t) smsz) * sizeof (bstring); - l = (bstring *) bstr__realloc (sl->entry, nsz); - if (!l) return BSTR_ERR; - } - sl->mlen = smsz; - sl->entry = l; - return BSTR_OK; +int bstrListAlloc(struct bstrList * sl, int msz) +{ + bstring * l; + int smsz; + size_t nsz; + if (!sl || msz <= 0 || !sl->entry || sl->qty < 0 || sl->mlen <= 0 || sl->qty > sl->mlen) return BSTR_ERR; + if (sl->mlen >= msz) return BSTR_OK; + smsz = snapUpSize(msz); + nsz = ((size_t)smsz) * sizeof(bstring); + if (nsz < (size_t)smsz) return BSTR_ERR; + l = (bstring *)bstr__realloc(sl->entry, nsz); + if (!l) + { + smsz = msz; + nsz = ((size_t)smsz) * sizeof(bstring); + l = (bstring *)bstr__realloc(sl->entry, nsz); + if (!l) return BSTR_ERR; + } + sl->mlen = smsz; + sl->entry = l; + return BSTR_OK; } /* int bstrListAllocMin (struct bstrList * sl, int msz) @@ -2483,196 +2743,218 @@ size_t nsz; * Try to allocate the minimum amount of memory for the list to include at * least msz entries or sl->qty whichever is greater. */ -int bstrListAllocMin (struct bstrList * sl, int msz) { -bstring * l; -size_t nsz; - if (!sl || msz <= 0 || !sl->entry || sl->qty < 0 || sl->mlen <= 0 || sl->qty > sl->mlen) return BSTR_ERR; - if (msz < sl->qty) msz = sl->qty; - if (sl->mlen == msz) return BSTR_OK; - nsz = ((size_t) msz) * sizeof (bstring); - if (nsz < (size_t) msz) return BSTR_ERR; - l = (bstring *) bstr__realloc (sl->entry, nsz); - if (!l) return BSTR_ERR; - sl->mlen = msz; - sl->entry = l; - return BSTR_OK; +int bstrListAllocMin(struct bstrList * sl, int msz) +{ + bstring * l; + size_t nsz; + if (!sl || msz <= 0 || !sl->entry || sl->qty < 0 || sl->mlen <= 0 || sl->qty > sl->mlen) return BSTR_ERR; + if (msz < sl->qty) msz = sl->qty; + if (sl->mlen == msz) return BSTR_OK; + nsz = ((size_t)msz) * sizeof(bstring); + if (nsz < (size_t)msz) return BSTR_ERR; + l = (bstring *)bstr__realloc(sl->entry, nsz); + if (!l) return BSTR_ERR; + sl->mlen = msz; + sl->entry = l; + return BSTR_OK; } /* int bsplitcb (const_bstring str, unsigned char splitChar, int pos, - * int (* cb) (void * parm, int ofs, int len), void * parm) + * int (* cb) (void * parm, int ofs, int len), void * parm) * * Iterate the set of disjoint sequential substrings over str divided by the * character in splitChar. * - * Note: Non-destructive modification of str from within the cb function - * while performing this split is not undefined. bsplitcb behaves in - * sequential lock step with calls to cb. I.e., after returning from a cb - * that return a non-negative integer, bsplitcb continues from the position - * 1 character after the last detected split character and it will halt - * immediately if the length of str falls below this point. However, if the - * cb function destroys str, then it *must* return with a negative value, + * Note: Non-destructive modification of str from within the cb function + * while performing this split is not undefined. bsplitcb behaves in + * sequential lock step with calls to cb. I.e., after returning from a cb + * that return a non-negative integer, bsplitcb continues from the position + * 1 character after the last detected split character and it will halt + * immediately if the length of str falls below this point. However, if the + * cb function destroys str, then it *must* return with a negative value, * otherwise bsplitcb will continue in an undefined manner. */ -int bsplitcb (const_bstring str, unsigned char splitChar, int pos, - int (* cb) (void * parm, int ofs, int len), void * parm) { -int i, p, ret; +int bsplitcb(const_bstring str, unsigned char splitChar, int pos, + int (* cb)(void * parm, int ofs, int len), void * parm) +{ + int i, p, ret; - if (cb == NULL || str == NULL || pos < 0 || pos > str->slen) - return BSTR_ERR; + if (cb == NULL || str == NULL || pos < 0 || pos > str->slen) + return BSTR_ERR; - p = pos; - do { - for (i=p; i < str->slen; i++) { - if (str->data[i] == splitChar) break; - } - if ((ret = cb (parm, p, i - p)) < 0) return ret; - p = i + 1; - } while (p <= str->slen); - return BSTR_OK; + p = pos; + do + { + for (i = p; i < str->slen; i++) + { + if (str->data[i] == splitChar) break; + } + if ((ret = cb(parm, p, i - p)) < 0) return ret; + p = i + 1; + } + while (p <= str->slen); + return BSTR_OK; } /* int bsplitscb (const_bstring str, const_bstring splitStr, int pos, - * int (* cb) (void * parm, int ofs, int len), void * parm) + * int (* cb) (void * parm, int ofs, int len), void * parm) * - * Iterate the set of disjoint sequential substrings over str divided by any + * Iterate the set of disjoint sequential substrings over str divided by any * of the characters in splitStr. An empty splitStr causes the whole str to * be iterated once. * - * Note: Non-destructive modification of str from within the cb function - * while performing this split is not undefined. bsplitscb behaves in - * sequential lock step with calls to cb. I.e., after returning from a cb - * that return a non-negative integer, bsplitscb continues from the position - * 1 character after the last detected split character and it will halt - * immediately if the length of str falls below this point. However, if the - * cb function destroys str, then it *must* return with a negative value, + * Note: Non-destructive modification of str from within the cb function + * while performing this split is not undefined. bsplitscb behaves in + * sequential lock step with calls to cb. I.e., after returning from a cb + * that return a non-negative integer, bsplitscb continues from the position + * 1 character after the last detected split character and it will halt + * immediately if the length of str falls below this point. However, if the + * cb function destroys str, then it *must* return with a negative value, * otherwise bsplitscb will continue in an undefined manner. */ -int bsplitscb (const_bstring str, const_bstring splitStr, int pos, - int (* cb) (void * parm, int ofs, int len), void * parm) { -struct charField chrs; -int i, p, ret; - - if (cb == NULL || str == NULL || pos < 0 || pos > str->slen - || splitStr == NULL || splitStr->slen < 0) return BSTR_ERR; - if (splitStr->slen == 0) { - if ((ret = cb (parm, 0, str->slen)) > 0) ret = 0; - return ret; - } - - if (splitStr->slen == 1) - return bsplitcb (str, splitStr->data[0], pos, cb, parm); - - buildCharField (&chrs, splitStr); - - p = pos; - do { - for (i=p; i < str->slen; i++) { - if (testInCharField (&chrs, str->data[i])) break; - } - if ((ret = cb (parm, p, i - p)) < 0) return ret; - p = i + 1; - } while (p <= str->slen); - return BSTR_OK; +int bsplitscb(const_bstring str, const_bstring splitStr, int pos, + int (* cb)(void * parm, int ofs, int len), void * parm) +{ + struct charField chrs; + int i, p, ret; + + if (cb == NULL || str == NULL || pos < 0 || pos > str->slen + || splitStr == NULL || splitStr->slen < 0) return BSTR_ERR; + if (splitStr->slen == 0) + { + if ((ret = cb(parm, 0, str->slen)) > 0) ret = 0; + return ret; + } + + if (splitStr->slen == 1) + return bsplitcb(str, splitStr->data[0], pos, cb, parm); + + buildCharField(&chrs, splitStr); + + p = pos; + do + { + for (i = p; i < str->slen; i++) + { + if (testInCharField(&chrs, str->data[i])) break; + } + if ((ret = cb(parm, p, i - p)) < 0) return ret; + p = i + 1; + } + while (p <= str->slen); + return BSTR_OK; } /* int bsplitstrcb (const_bstring str, const_bstring splitStr, int pos, - * int (* cb) (void * parm, int ofs, int len), void * parm) + * int (* cb) (void * parm, int ofs, int len), void * parm) * - * Iterate the set of disjoint sequential substrings over str divided by the - * substring splitStr. An empty splitStr causes the whole str to be + * Iterate the set of disjoint sequential substrings over str divided by the + * substring splitStr. An empty splitStr causes the whole str to be * iterated once. * - * Note: Non-destructive modification of str from within the cb function - * while performing this split is not undefined. bsplitstrcb behaves in - * sequential lock step with calls to cb. I.e., after returning from a cb - * that return a non-negative integer, bsplitscb continues from the position - * 1 character after the last detected split character and it will halt - * immediately if the length of str falls below this point. However, if the - * cb function destroys str, then it *must* return with a negative value, + * Note: Non-destructive modification of str from within the cb function + * while performing this split is not undefined. bsplitstrcb behaves in + * sequential lock step with calls to cb. I.e., after returning from a cb + * that return a non-negative integer, bsplitscb continues from the position + * 1 character after the last detected split character and it will halt + * immediately if the length of str falls below this point. However, if the + * cb function destroys str, then it *must* return with a negative value, * otherwise bsplitscb will continue in an undefined manner. */ -int bsplitstrcb (const_bstring str, const_bstring splitStr, int pos, - int (* cb) (void * parm, int ofs, int len), void * parm) { -int i, p, ret; - - if (cb == NULL || str == NULL || pos < 0 || pos > str->slen - || splitStr == NULL || splitStr->slen < 0) return BSTR_ERR; - - if (0 == splitStr->slen) { - for (i=pos; i < str->slen; i++) { - if ((ret = cb (parm, i, 1)) < 0) return ret; - } - return BSTR_OK; - } - - if (splitStr->slen == 1) - return bsplitcb (str, splitStr->data[0], pos, cb, parm); - - for (i=p=pos; i <= str->slen - splitStr->slen; i++) { - if (0 == bstr__memcmp (splitStr->data, str->data + i, splitStr->slen)) { - if ((ret = cb (parm, p, i - p)) < 0) return ret; - i += splitStr->slen; - p = i; - } - } - if ((ret = cb (parm, p, str->slen - p)) < 0) return ret; - return BSTR_OK; -} - -struct genBstrList { - bstring b; - struct bstrList * bl; +int bsplitstrcb(const_bstring str, const_bstring splitStr, int pos, + int (* cb)(void * parm, int ofs, int len), void * parm) +{ + int i, p, ret; + + if (cb == NULL || str == NULL || pos < 0 || pos > str->slen + || splitStr == NULL || splitStr->slen < 0) return BSTR_ERR; + + if (0 == splitStr->slen) + { + for (i = pos; i < str->slen; i++) + { + if ((ret = cb(parm, i, 1)) < 0) return ret; + } + return BSTR_OK; + } + + if (splitStr->slen == 1) + return bsplitcb(str, splitStr->data[0], pos, cb, parm); + + for (i = p = pos; i <= str->slen - splitStr->slen; i++) + { + if (0 == bstr__memcmp(splitStr->data, str->data + i, splitStr->slen)) + { + if ((ret = cb(parm, p, i - p)) < 0) return ret; + i += splitStr->slen; + p = i; + } + } + if ((ret = cb(parm, p, str->slen - p)) < 0) return ret; + return BSTR_OK; +} + +struct genBstrList +{ + bstring b; + struct bstrList * bl; }; -static int bscb (void * parm, int ofs, int len) { -struct genBstrList * g = (struct genBstrList *) parm; - if (g->bl->qty >= g->bl->mlen) { - int mlen = g->bl->mlen * 2; - bstring * tbl; +static int bscb(void * parm, int ofs, int len) +{ + struct genBstrList * g = (struct genBstrList *)parm; + if (g->bl->qty >= g->bl->mlen) + { + int mlen = g->bl->mlen * 2; + bstring * tbl; - while (g->bl->qty >= mlen) { - if (mlen < g->bl->mlen) return BSTR_ERR; - mlen += mlen; - } + while (g->bl->qty >= mlen) + { + if (mlen < g->bl->mlen) return BSTR_ERR; + mlen += mlen; + } - tbl = (bstring *) bstr__realloc (g->bl->entry, sizeof (bstring) * mlen); - if (tbl == NULL) return BSTR_ERR; + tbl = (bstring *)bstr__realloc(g->bl->entry, sizeof(bstring) * mlen); + if (tbl == NULL) return BSTR_ERR; - g->bl->entry = tbl; - g->bl->mlen = mlen; - } + g->bl->entry = tbl; + g->bl->mlen = mlen; + } - g->bl->entry[g->bl->qty] = bmidstr (g->b, ofs, len); - g->bl->qty++; - return BSTR_OK; + g->bl->entry[g->bl->qty] = bmidstr(g->b, ofs, len); + g->bl->qty++; + return BSTR_OK; } /* struct bstrList * bsplit (const_bstring str, unsigned char splitChar) * * Create an array of sequential substrings from str divided by the character - * splitChar. - */ -struct bstrList * bsplit (const_bstring str, unsigned char splitChar) { -struct genBstrList g; - - if (str == NULL || str->data == NULL || str->slen < 0) return NULL; - - g.bl = (struct bstrList *) bstr__alloc (sizeof (struct bstrList)); - if (g.bl == NULL) return NULL; - g.bl->mlen = 4; - g.bl->entry = (bstring *) bstr__alloc (g.bl->mlen * sizeof (bstring)); - if (NULL == g.bl->entry) { - bstr__free (g.bl); - return NULL; - } - - g.b = (bstring) str; - g.bl->qty = 0; - if (bsplitcb (str, splitChar, 0, bscb, &g) < 0) { - bstrListDestroy (g.bl); - return NULL; - } - return g.bl; + * splitChar. + */ +struct bstrList * bsplit(const_bstring str, unsigned char splitChar) +{ + struct genBstrList g; + + if (str == NULL || str->data == NULL || str->slen < 0) return NULL; + + g.bl = (struct bstrList *)bstr__alloc(sizeof(struct bstrList)); + if (g.bl == NULL) return NULL; + g.bl->mlen = 4; + g.bl->entry = (bstring *)bstr__alloc(g.bl->mlen * sizeof(bstring)); + if (NULL == g.bl->entry) + { + bstr__free(g.bl); + return NULL; + } + + g.b = (bstring)str; + g.bl->qty = 0; + if (bsplitcb(str, splitChar, 0, bscb, &g) < 0) + { + bstrListDestroy(g.bl); + return NULL; + } + return g.bl; } /* struct bstrList * bsplitstr (const_bstring str, const_bstring splitStr) @@ -2680,61 +2962,67 @@ struct genBstrList g; * Create an array of sequential substrings from str divided by the entire * substring splitStr. */ -struct bstrList * bsplitstr (const_bstring str, const_bstring splitStr) { -struct genBstrList g; +struct bstrList * bsplitstr(const_bstring str, const_bstring splitStr) +{ + struct genBstrList g; - if (str == NULL || str->data == NULL || str->slen < 0) return NULL; + if (str == NULL || str->data == NULL || str->slen < 0) return NULL; - g.bl = (struct bstrList *) bstr__alloc (sizeof (struct bstrList)); - if (g.bl == NULL) return NULL; - g.bl->mlen = 4; - g.bl->entry = (bstring *) bstr__alloc (g.bl->mlen * sizeof (bstring)); - if (NULL == g.bl->entry) { - bstr__free (g.bl); - return NULL; - } + g.bl = (struct bstrList *)bstr__alloc(sizeof(struct bstrList)); + if (g.bl == NULL) return NULL; + g.bl->mlen = 4; + g.bl->entry = (bstring *)bstr__alloc(g.bl->mlen * sizeof(bstring)); + if (NULL == g.bl->entry) + { + bstr__free(g.bl); + return NULL; + } - g.b = (bstring) str; - g.bl->qty = 0; - if (bsplitstrcb (str, splitStr, 0, bscb, &g) < 0) { - bstrListDestroy (g.bl); - return NULL; - } - return g.bl; + g.b = (bstring)str; + g.bl->qty = 0; + if (bsplitstrcb(str, splitStr, 0, bscb, &g) < 0) + { + bstrListDestroy(g.bl); + return NULL; + } + return g.bl; } /* struct bstrList * bsplits (const_bstring str, bstring splitStr) * - * Create an array of sequential substrings from str divided by any of the + * Create an array of sequential substrings from str divided by any of the * characters in splitStr. An empty splitStr causes a single entry bstrList * containing a copy of str to be returned. */ -struct bstrList * bsplits (const_bstring str, const_bstring splitStr) { -struct genBstrList g; - - if ( str == NULL || str->slen < 0 || str->data == NULL || - splitStr == NULL || splitStr->slen < 0 || splitStr->data == NULL) - return NULL; - - g.bl = (struct bstrList *) bstr__alloc (sizeof (struct bstrList)); - if (g.bl == NULL) return NULL; - g.bl->mlen = 4; - g.bl->entry = (bstring *) bstr__alloc (g.bl->mlen * sizeof (bstring)); - if (NULL == g.bl->entry) { - bstr__free (g.bl); - return NULL; - } - g.b = (bstring) str; - g.bl->qty = 0; - - if (bsplitscb (str, splitStr, 0, bscb, &g) < 0) { - bstrListDestroy (g.bl); - return NULL; - } - return g.bl; -} - -#if defined (__TURBOC__) && !defined (__BORLANDC__) +struct bstrList * bsplits(const_bstring str, const_bstring splitStr) +{ + struct genBstrList g; + + if (str == NULL || str->slen < 0 || str->data == NULL || + splitStr == NULL || splitStr->slen < 0 || splitStr->data == NULL) + return NULL; + + g.bl = (struct bstrList *)bstr__alloc(sizeof(struct bstrList)); + if (g.bl == NULL) return NULL; + g.bl->mlen = 4; + g.bl->entry = (bstring *)bstr__alloc(g.bl->mlen * sizeof(bstring)); + if (NULL == g.bl->entry) + { + bstr__free(g.bl); + return NULL; + } + g.b = (bstring)str; + g.bl->qty = 0; + + if (bsplitscb(str, splitStr, 0, bscb, &g) < 0) + { + bstrListDestroy(g.bl); + return NULL; + } + return g.bl; +} + +#if defined(__TURBOC__) && !defined(__BORLANDC__) # ifndef BSTRLIB_NOVSNP # define BSTRLIB_NOVSNP # endif @@ -2742,233 +3030,251 @@ struct genBstrList g; /* Give WATCOM C/C++, MSVC some latitude for their non-support of vsnprintf */ #if defined(__WATCOMC__) || defined(_MSC_VER) -#define exvsnprintf(r,b,n,f,a) {r = _vsnprintf (b,n,f,a);} +#define exvsnprintf(r, b, n, f, a) {r = _vsnprintf (b,n,f,a);} #else #ifdef BSTRLIB_NOVSNP -/* This is just a hack. If you are using a system without a vsnprintf, it is +/* This is just a hack. If you are using a system without a vsnprintf, it is not recommended that bformat be used at all. */ -#define exvsnprintf(r,b,n,f,a) {vsprintf (b,f,a); r = -1;} +#define exvsnprintf(r, b, n, f, a) {vsprintf (b,f,a); r = -1;} #define START_VSNBUFF (256) #else #ifdef __GNUC__ -/* Something is making gcc complain about this prototype not being here, so +/* Something is making gcc complain about this prototype not being here, so I've just gone ahead and put it in. */ //extern int vsnprintf (char *buf, size_t count, const char *format, va_list arg); #endif -#define exvsnprintf(r,b,n,f,a) {r = vsnprintf (b,n,f,a);} +#define exvsnprintf(r, b, n, f, a) {r = vsnprintf (b,n,f,a);} #endif #endif -#if !defined (BSTRLIB_NOVSNP) +#if !defined(BSTRLIB_NOVSNP) #ifndef START_VSNBUFF #define START_VSNBUFF (16) #endif -/* On IRIX vsnprintf returns n-1 when the operation would overflow the target - buffer, WATCOM and MSVC both return -1, while C99 requires that the +/* On IRIX vsnprintf returns n-1 when the operation would overflow the target + buffer, WATCOM and MSVC both return -1, while C99 requires that the returned value be exactly what the length would be if the buffer would be - large enough. This leads to the idea that if the return value is larger + large enough. This leads to the idea that if the return value is larger than n, then changing n to the return value will reduce the number of iterations required. */ /* int bformata (bstring b, const char * fmt, ...) * - * After the first parameter, it takes the same parameters as printf (), but - * rather than outputting results to stdio, it appends the results to - * a bstring which contains what would have been output. Note that if there - * is an early generation of a '\0' character, the bstring will be truncated + * After the first parameter, it takes the same parameters as printf (), but + * rather than outputting results to stdio, it appends the results to + * a bstring which contains what would have been output. Note that if there + * is an early generation of a '\0' character, the bstring will be truncated * to this end point. */ -int bformata (bstring b, const char * fmt, ...) { -va_list arglist; -bstring buff; -int n, r; +int bformata(bstring b, const char * fmt, ...) +{ + va_list arglist; + bstring buff; + int n, r; - if (b == NULL || fmt == NULL || b->data == NULL || b->mlen <= 0 - || b->slen < 0 || b->slen > b->mlen) return BSTR_ERR; + if (b == NULL || fmt == NULL || b->data == NULL || b->mlen <= 0 + || b->slen < 0 || b->slen > b->mlen) return BSTR_ERR; - /* Since the length is not determinable beforehand, a search is - performed using the truncating "vsnprintf" call (to avoid buffer - overflows) on increasing potential sizes for the output result. */ + /* Since the length is not determinable beforehand, a search is + performed using the truncating "vsnprintf" call (to avoid buffer + overflows) on increasing potential sizes for the output result. */ - if ((n = (int) (2*strlen (fmt))) < START_VSNBUFF) n = START_VSNBUFF; - if (NULL == (buff = bfromcstralloc (n + 2, ""))) { - n = 1; - if (NULL == (buff = bfromcstralloc (n + 2, ""))) return BSTR_ERR; - } + if ((n = (int)(2 * strlen(fmt))) < START_VSNBUFF) n = START_VSNBUFF; + if (NULL == (buff = bfromcstralloc(n + 2, ""))) + { + n = 1; + if (NULL == (buff = bfromcstralloc(n + 2, ""))) return BSTR_ERR; + } - for (;;) { - va_start (arglist, fmt); - exvsnprintf (r, (char *) buff->data, n + 1, fmt, arglist); - va_end (arglist); + for (;;) + { + va_start(arglist, fmt); + exvsnprintf(r, (char *)buff->data, n + 1, fmt, arglist); + va_end(arglist); - buff->data[n] = (unsigned char) '\0'; - buff->slen = (int) (strlen) ((char *) buff->data); + buff->data[n] = (unsigned char)'\0'; + buff->slen = (int)(strlen)((char *)buff->data); - if (buff->slen < n) break; + if (buff->slen < n) break; - if (r > n) n = r; else n += n; + if (r > n) n = r; else n += n; - if (BSTR_OK != balloc (buff, n + 2)) { - bdestroy (buff); - return BSTR_ERR; - } - } + if (BSTR_OK != balloc(buff, n + 2)) + { + bdestroy(buff); + return BSTR_ERR; + } + } - r = bconcat (b, buff); - bdestroy (buff); - return r; + r = bconcat(b, buff); + bdestroy(buff); + return r; } /* int bassignformat (bstring b, const char * fmt, ...) * - * After the first parameter, it takes the same parameters as printf (), but - * rather than outputting results to stdio, it outputs the results to - * the bstring parameter b. Note that if there is an early generation of a + * After the first parameter, it takes the same parameters as printf (), but + * rather than outputting results to stdio, it outputs the results to + * the bstring parameter b. Note that if there is an early generation of a * '\0' character, the bstring will be truncated to this end point. */ -int bassignformat (bstring b, const char * fmt, ...) { -va_list arglist; -bstring buff; -int n, r; +int bassignformat(bstring b, const char * fmt, ...) +{ + va_list arglist; + bstring buff; + int n, r; - if (b == NULL || fmt == NULL || b->data == NULL || b->mlen <= 0 - || b->slen < 0 || b->slen > b->mlen) return BSTR_ERR; + if (b == NULL || fmt == NULL || b->data == NULL || b->mlen <= 0 + || b->slen < 0 || b->slen > b->mlen) return BSTR_ERR; - /* Since the length is not determinable beforehand, a search is - performed using the truncating "vsnprintf" call (to avoid buffer - overflows) on increasing potential sizes for the output result. */ + /* Since the length is not determinable beforehand, a search is + performed using the truncating "vsnprintf" call (to avoid buffer + overflows) on increasing potential sizes for the output result. */ - if ((n = (int) (2*strlen (fmt))) < START_VSNBUFF) n = START_VSNBUFF; - if (NULL == (buff = bfromcstralloc (n + 2, ""))) { - n = 1; - if (NULL == (buff = bfromcstralloc (n + 2, ""))) return BSTR_ERR; - } + if ((n = (int)(2 * strlen(fmt))) < START_VSNBUFF) n = START_VSNBUFF; + if (NULL == (buff = bfromcstralloc(n + 2, ""))) + { + n = 1; + if (NULL == (buff = bfromcstralloc(n + 2, ""))) return BSTR_ERR; + } - for (;;) { - va_start (arglist, fmt); - exvsnprintf (r, (char *) buff->data, n + 1, fmt, arglist); - va_end (arglist); + for (;;) + { + va_start(arglist, fmt); + exvsnprintf(r, (char *)buff->data, n + 1, fmt, arglist); + va_end(arglist); - buff->data[n] = (unsigned char) '\0'; - buff->slen = (int) (strlen) ((char *) buff->data); + buff->data[n] = (unsigned char)'\0'; + buff->slen = (int)(strlen)((char *)buff->data); - if (buff->slen < n) break; + if (buff->slen < n) break; - if (r > n) n = r; else n += n; + if (r > n) n = r; else n += n; - if (BSTR_OK != balloc (buff, n + 2)) { - bdestroy (buff); - return BSTR_ERR; - } - } + if (BSTR_OK != balloc(buff, n + 2)) + { + bdestroy(buff); + return BSTR_ERR; + } + } - r = bassign (b, buff); - bdestroy (buff); - return r; + r = bassign(b, buff); + bdestroy(buff); + return r; } /* bstring bformat (const char * fmt, ...) * * Takes the same parameters as printf (), but rather than outputting results * to stdio, it forms a bstring which contains what would have been output. - * Note that if there is an early generation of a '\0' character, the + * Note that if there is an early generation of a '\0' character, the * bstring will be truncated to this end point. */ -bstring bformat (const char * fmt, ...) { -va_list arglist; -bstring buff; -int n, r; +bstring bformat(const char * fmt, ...) +{ + va_list arglist; + bstring buff; + int n, r; - if (fmt == NULL) return NULL; + if (fmt == NULL) return NULL; - /* Since the length is not determinable beforehand, a search is - performed using the truncating "vsnprintf" call (to avoid buffer - overflows) on increasing potential sizes for the output result. */ + /* Since the length is not determinable beforehand, a search is + performed using the truncating "vsnprintf" call (to avoid buffer + overflows) on increasing potential sizes for the output result. */ - if ((n = (int) (2*strlen (fmt))) < START_VSNBUFF) n = START_VSNBUFF; - if (NULL == (buff = bfromcstralloc (n + 2, ""))) { - n = 1; - if (NULL == (buff = bfromcstralloc (n + 2, ""))) return NULL; - } + if ((n = (int)(2 * strlen(fmt))) < START_VSNBUFF) n = START_VSNBUFF; + if (NULL == (buff = bfromcstralloc(n + 2, ""))) + { + n = 1; + if (NULL == (buff = bfromcstralloc(n + 2, ""))) return NULL; + } - for (;;) { - va_start (arglist, fmt); - exvsnprintf (r, (char *) buff->data, n + 1, fmt, arglist); - va_end (arglist); + for (;;) + { + va_start(arglist, fmt); + exvsnprintf(r, (char *)buff->data, n + 1, fmt, arglist); + va_end(arglist); - buff->data[n] = (unsigned char) '\0'; - buff->slen = (int) (strlen) ((char *) buff->data); + buff->data[n] = (unsigned char)'\0'; + buff->slen = (int)(strlen)((char *)buff->data); - if (buff->slen < n) break; + if (buff->slen < n) break; - if (r > n) n = r; else n += n; + if (r > n) n = r; else n += n; - if (BSTR_OK != balloc (buff, n + 2)) { - bdestroy (buff); - return NULL; - } - } + if (BSTR_OK != balloc(buff, n + 2)) + { + bdestroy(buff); + return NULL; + } + } - return buff; + return buff; } /* int bvcformata (bstring b, int count, const char * fmt, va_list arglist) * - * The bvcformata function formats data under control of the format control - * string fmt and attempts to append the result to b. The fmt parameter is - * the same as that of the printf function. The variable argument list is + * The bvcformata function formats data under control of the format control + * string fmt and attempts to append the result to b. The fmt parameter is + * the same as that of the printf function. The variable argument list is * replaced with arglist, which has been initialized by the va_start macro. - * The size of the appended output is upper bounded by count. If the - * required output exceeds count, the string b is not augmented with any - * contents and a value below BSTR_ERR is returned. If a value below -count - * is returned then it is recommended that the negative of this value be - * used as an update to the count in a subsequent pass. On other errors, - * such as running out of memory, parameter errors or numeric wrap around - * BSTR_ERR is returned. BSTR_OK is returned when the output is successfully + * The size of the appended output is upper bounded by count. If the + * required output exceeds count, the string b is not augmented with any + * contents and a value below BSTR_ERR is returned. If a value below -count + * is returned then it is recommended that the negative of this value be + * used as an update to the count in a subsequent pass. On other errors, + * such as running out of memory, parameter errors or numeric wrap around + * BSTR_ERR is returned. BSTR_OK is returned when the output is successfully * generated and appended to b. * * Note: There is no sanity checking of arglist, and this function is - * destructive of the contents of b from the b->slen point onward. If there - * is an early generation of a '\0' character, the bstring will be truncated + * destructive of the contents of b from the b->slen point onward. If there + * is an early generation of a '\0' character, the bstring will be truncated * to this end point. */ -int bvcformata (bstring b, int count, const char * fmt, va_list arg) { -int n, r, l; - - if (b == NULL || fmt == NULL || count <= 0 || b->data == NULL - || b->mlen <= 0 || b->slen < 0 || b->slen > b->mlen) return BSTR_ERR; - - if (count > (n = b->slen + count) + 2) return BSTR_ERR; - if (BSTR_OK != balloc (b, n + 2)) return BSTR_ERR; - - exvsnprintf (r, (char *) b->data + b->slen, count + 2, fmt, arg); - - /* Did the operation complete successfully within bounds? */ - for (l = b->slen; l <= n; l++) { - if ('\0' == b->data[l]) { - b->slen = l; - return BSTR_OK; - } - } - - /* Abort, since the buffer was not large enough. The return value - tries to help set what the retry length should be. */ - - b->data[b->slen] = '\0'; - if (r > count + 1) { /* Does r specify a particular target length? */ - n = r; - } else { - n = count + count; /* If not, just double the size of count */ - if (count > n) n = INT_MAX; - } - n = -n; - - if (n > BSTR_ERR-1) n = BSTR_ERR-1; - return n; +int bvcformata(bstring b, int count, const char * fmt, va_list arg) +{ + int n, r, l; + + if (b == NULL || fmt == NULL || count <= 0 || b->data == NULL + || b->mlen <= 0 || b->slen < 0 || b->slen > b->mlen) return BSTR_ERR; + + if (count > (n = b->slen + count) + 2) return BSTR_ERR; + if (BSTR_OK != balloc(b, n + 2)) return BSTR_ERR; + + exvsnprintf(r, (char *)b->data + b->slen, count + 2, fmt, arg); + + /* Did the operation complete successfully within bounds? */ + for (l = b->slen; l <= n; l++) + { + if ('\0' == b->data[l]) + { + b->slen = l; + return BSTR_OK; + } + } + + /* Abort, since the buffer was not large enough. The return value + tries to help set what the retry length should be. */ + + b->data[b->slen] = '\0'; + if (r > count + 1) /* Does r specify a particular target length? */ + { + n = r; + } + else + { + n = count + count; /* If not, just double the size of count */ + if (count > n) n = INT_MAX; + } + n = -n; + + if (n > BSTR_ERR - 1) n = BSTR_ERR - 1; + return n; } #endif diff --git a/src/cbstring/bstrlib.h b/src/cbstring/bstrlib.h index 24626b9..5ea8454 100644 --- a/src/cbstring/bstrlib.h +++ b/src/cbstring/bstrlib.h @@ -1,14 +1,14 @@ /* * This source file is part of the bstring string library. This code was - * written by Paul Hsieh in 2002-2010, and is covered by either the 3-clause - * BSD open source license or GPL v2.0. Refer to the accompanying documentation + * written by Paul Hsieh in 2002-2010, and is covered by either the 3-clause + * BSD open source license or GPL v2.0. Refer to the accompanying documentation * for details on usage and license. */ /* * bstrlib.h * - * This file is the header file for the core module for implementing the + * This file is the header file for the core module for implementing the * bstring functions. */ @@ -24,8 +24,8 @@ extern "C" { #include #include -#if !defined (BSTRLIB_VSNP_OK) && !defined (BSTRLIB_NOVSNP) -# if defined (__TURBOC__) && !defined (__BORLANDC__) +#if !defined(BSTRLIB_VSNP_OK) && !defined(BSTRLIB_NOVSNP) +# if defined(__TURBOC__) && !defined(__BORLANDC__) # define BSTRLIB_NOVSNP # endif #endif @@ -39,121 +39,122 @@ typedef const struct tagbstring * const_bstring; /* Copy functions */ #define cstr2bstr bfromcstr -extern bstring bfromcstr (const char * str); -extern bstring bfromcstralloc (int mlen, const char * str); -extern bstring blk2bstr (const void * blk, int len); -extern char * bstr2cstr (const_bstring s, char z); -extern int bcstrfree (char * s); -extern bstring bstrcpy (const_bstring b1); -extern int bassign (bstring a, const_bstring b); -extern int bassignmidstr (bstring a, const_bstring b, int left, int len); -extern int bassigncstr (bstring a, const char * str); -extern int bassignblk (bstring a, const void * s, int len); +extern bstring bfromcstr(const char * str); +extern bstring bfromcstralloc(int mlen, const char * str); +extern bstring blk2bstr(const void * blk, int len); +extern char * bstr2cstr(const_bstring s, char z); +extern int bcstrfree(char * s); +extern bstring bstrcpy(const_bstring b1); +extern int bassign(bstring a, const_bstring b); +extern int bassignmidstr(bstring a, const_bstring b, int left, int len); +extern int bassigncstr(bstring a, const char * str); +extern int bassignblk(bstring a, const void * s, int len); /* Destroy function */ -extern int bdestroy (bstring b); +extern int bdestroy(bstring b); /* Space allocation hinting functions */ -extern int balloc (bstring s, int len); -extern int ballocmin (bstring b, int len); +extern int balloc(bstring s, int len); +extern int ballocmin(bstring b, int len); /* Substring extraction */ -extern bstring bmidstr (const_bstring b, int left, int len); +extern bstring bmidstr(const_bstring b, int left, int len); /* Various standard manipulations */ -extern int bconcat (bstring b0, const_bstring b1); -extern int bconchar (bstring b0, char c); -extern int bcatcstr (bstring b, const char * s); -extern int bcatblk (bstring b, const void * s, int len); -extern int binsert (bstring s1, int pos, const_bstring s2, unsigned char fill); -extern int binsertch (bstring s1, int pos, int len, unsigned char fill); -extern int breplace (bstring b1, int pos, int len, const_bstring b2, unsigned char fill); -extern int bdelete (bstring s1, int pos, int len); -extern int bsetstr (bstring b0, int pos, const_bstring b1, unsigned char fill); -extern int btrunc (bstring b, int n); +extern int bconcat(bstring b0, const_bstring b1); +extern int bconchar(bstring b0, char c); +extern int bcatcstr(bstring b, const char * s); +extern int bcatblk(bstring b, const void * s, int len); +extern int binsert(bstring s1, int pos, const_bstring s2, unsigned char fill); +extern int binsertch(bstring s1, int pos, int len, unsigned char fill); +extern int breplace(bstring b1, int pos, int len, const_bstring b2, unsigned char fill); +extern int bdelete(bstring s1, int pos, int len); +extern int bsetstr(bstring b0, int pos, const_bstring b1, unsigned char fill); +extern int btrunc(bstring b, int n); /* Scan/search functions */ -extern int bstricmp (const_bstring b0, const_bstring b1); -extern int bstrnicmp (const_bstring b0, const_bstring b1, int n); -extern int biseqcaseless (const_bstring b0, const_bstring b1); -extern int bisstemeqcaselessblk (const_bstring b0, const void * blk, int len); -extern int biseq (const_bstring b0, const_bstring b1); -extern int bisstemeqblk (const_bstring b0, const void * blk, int len); -extern int biseqcstr (const_bstring b, const char * s); -extern int biseqcstrcaseless (const_bstring b, const char * s); -extern int bstrcmp (const_bstring b0, const_bstring b1); -extern int bstrncmp (const_bstring b0, const_bstring b1, int n); -extern int binstr (const_bstring s1, int pos, const_bstring s2); -extern int binstrr (const_bstring s1, int pos, const_bstring s2); -extern int binstrcaseless (const_bstring s1, int pos, const_bstring s2); -extern int binstrrcaseless (const_bstring s1, int pos, const_bstring s2); -extern int bstrchrp (const_bstring b, int c, int pos); -extern int bstrrchrp (const_bstring b, int c, int pos); -#define bstrchr(b,c) bstrchrp ((b), (c), 0) -#define bstrrchr(b,c) bstrrchrp ((b), (c), blength(b)-1) -extern int binchr (const_bstring b0, int pos, const_bstring b1); -extern int binchrr (const_bstring b0, int pos, const_bstring b1); -extern int bninchr (const_bstring b0, int pos, const_bstring b1); -extern int bninchrr (const_bstring b0, int pos, const_bstring b1); -extern int bfindreplace (bstring b, const_bstring find, const_bstring repl, int pos); -extern int bfindreplacecaseless (bstring b, const_bstring find, const_bstring repl, int pos); +extern int bstricmp(const_bstring b0, const_bstring b1); +extern int bstrnicmp(const_bstring b0, const_bstring b1, int n); +extern int biseqcaseless(const_bstring b0, const_bstring b1); +extern int bisstemeqcaselessblk(const_bstring b0, const void * blk, int len); +extern int biseq(const_bstring b0, const_bstring b1); +extern int bisstemeqblk(const_bstring b0, const void * blk, int len); +extern int biseqcstr(const_bstring b, const char * s); +extern int biseqcstrcaseless(const_bstring b, const char * s); +extern int bstrcmp(const_bstring b0, const_bstring b1); +extern int bstrncmp(const_bstring b0, const_bstring b1, int n); +extern int binstr(const_bstring s1, int pos, const_bstring s2); +extern int binstrr(const_bstring s1, int pos, const_bstring s2); +extern int binstrcaseless(const_bstring s1, int pos, const_bstring s2); +extern int binstrrcaseless(const_bstring s1, int pos, const_bstring s2); +extern int bstrchrp(const_bstring b, int c, int pos); +extern int bstrrchrp(const_bstring b, int c, int pos); +#define bstrchr(b, c) bstrchrp ((b), (c), 0) +#define bstrrchr(b, c) bstrrchrp ((b), (c), blength(b)-1) +extern int binchr(const_bstring b0, int pos, const_bstring b1); +extern int binchrr(const_bstring b0, int pos, const_bstring b1); +extern int bninchr(const_bstring b0, int pos, const_bstring b1); +extern int bninchrr(const_bstring b0, int pos, const_bstring b1); +extern int bfindreplace(bstring b, const_bstring find, const_bstring repl, int pos); +extern int bfindreplacecaseless(bstring b, const_bstring find, const_bstring repl, int pos); /* List of string container functions */ -struct bstrList { +struct bstrList +{ int qty, mlen; bstring * entry; }; -extern struct bstrList * bstrListCreate (void); -extern int bstrListDestroy (struct bstrList * sl); -extern int bstrListAlloc (struct bstrList * sl, int msz); -extern int bstrListAllocMin (struct bstrList * sl, int msz); +extern struct bstrList * bstrListCreate(void); +extern int bstrListDestroy(struct bstrList * sl); +extern int bstrListAlloc(struct bstrList * sl, int msz); +extern int bstrListAllocMin(struct bstrList * sl, int msz); /* String split and join functions */ -extern struct bstrList * bsplit (const_bstring str, unsigned char splitChar); -extern struct bstrList * bsplits (const_bstring str, const_bstring splitStr); -extern struct bstrList * bsplitstr (const_bstring str, const_bstring splitStr); -extern bstring bjoin (const struct bstrList * bl, const_bstring sep); -extern int bsplitcb (const_bstring str, unsigned char splitChar, int pos, - int (* cb) (void * parm, int ofs, int len), void * parm); -extern int bsplitscb (const_bstring str, const_bstring splitStr, int pos, - int (* cb) (void * parm, int ofs, int len), void * parm); -extern int bsplitstrcb (const_bstring str, const_bstring splitStr, int pos, - int (* cb) (void * parm, int ofs, int len), void * parm); +extern struct bstrList * bsplit(const_bstring str, unsigned char splitChar); +extern struct bstrList * bsplits(const_bstring str, const_bstring splitStr); +extern struct bstrList * bsplitstr(const_bstring str, const_bstring splitStr); +extern bstring bjoin(const struct bstrList * bl, const_bstring sep); +extern int bsplitcb(const_bstring str, unsigned char splitChar, int pos, + int (* cb)(void * parm, int ofs, int len), void * parm); +extern int bsplitscb(const_bstring str, const_bstring splitStr, int pos, + int (* cb)(void * parm, int ofs, int len), void * parm); +extern int bsplitstrcb(const_bstring str, const_bstring splitStr, int pos, + int (* cb)(void * parm, int ofs, int len), void * parm); /* Miscellaneous functions */ -extern int bpattern (bstring b, int len); -extern int btoupper (bstring b); -extern int btolower (bstring b); -extern int bltrimws (bstring b); -extern int brtrimws (bstring b); -extern int btrimws (bstring b); +extern int bpattern(bstring b, int len); +extern int btoupper(bstring b); +extern int btolower(bstring b); +extern int bltrimws(bstring b); +extern int brtrimws(bstring b); +extern int btrimws(bstring b); /* <*>printf format functions */ -#if !defined (BSTRLIB_NOVSNP) -extern bstring bformat (const char * fmt, ...); -extern int bformata (bstring b, const char * fmt, ...); -extern int bassignformat (bstring b, const char * fmt, ...); -extern int bvcformata (bstring b, int count, const char * fmt, va_list arglist); +#if !defined(BSTRLIB_NOVSNP) +extern bstring bformat(const char * fmt, ...); +extern int bformata(bstring b, const char * fmt, ...); +extern int bassignformat(bstring b, const char * fmt, ...); +extern int bvcformata(bstring b, int count, const char * fmt, va_list arglist); #define bvformata(ret, b, fmt, lastarg) { \ bstring bstrtmp_b = (b); \ const char * bstrtmp_fmt = (fmt); \ int bstrtmp_r = BSTR_ERR, bstrtmp_sz = 16; \ - for (;;) { \ - va_list bstrtmp_arglist; \ - va_start (bstrtmp_arglist, lastarg); \ - bstrtmp_r = bvcformata (bstrtmp_b, bstrtmp_sz, bstrtmp_fmt, bstrtmp_arglist); \ - va_end (bstrtmp_arglist); \ - if (bstrtmp_r >= 0) { /* Everything went ok */ \ - bstrtmp_r = BSTR_OK; \ - break; \ - } else if (-bstrtmp_r <= bstrtmp_sz) { /* A real error? */ \ - bstrtmp_r = BSTR_ERR; \ - break; \ - } \ - bstrtmp_sz = -bstrtmp_r; /* Doubled or target size */ \ - } \ - ret = bstrtmp_r; \ + for (;;) { \ + va_list bstrtmp_arglist; \ + va_start (bstrtmp_arglist, lastarg); \ + bstrtmp_r = bvcformata (bstrtmp_b, bstrtmp_sz, bstrtmp_fmt, bstrtmp_arglist); \ + va_end (bstrtmp_arglist); \ + if (bstrtmp_r >= 0) { /* Everything went ok */ \ + bstrtmp_r = BSTR_OK; \ + break; \ + } else if (-bstrtmp_r <= bstrtmp_sz) { /* A real error? */ \ + bstrtmp_r = BSTR_ERR; \ + break; \ + } \ + bstrtmp_sz = -bstrtmp_r; /* Doubled or target size */ \ + } \ + ret = bstrtmp_r; \ } #endif @@ -162,34 +163,35 @@ typedef int (*bNgetc) (void *parm); typedef size_t (* bNread) (void *buff, size_t elsize, size_t nelem, void *parm); /* Input functions */ -extern bstring bgets (bNgetc getcPtr, void * parm, char terminator); -extern bstring bread (bNread readPtr, void * parm); -extern int bgetsa (bstring b, bNgetc getcPtr, void * parm, char terminator); -extern int bassigngets (bstring b, bNgetc getcPtr, void * parm, char terminator); -extern int breada (bstring b, bNread readPtr, void * parm); +extern bstring bgets(bNgetc getcPtr, void * parm, char terminator); +extern bstring bread(bNread readPtr, void * parm); +extern int bgetsa(bstring b, bNgetc getcPtr, void * parm, char terminator); +extern int bassigngets(bstring b, bNgetc getcPtr, void * parm, char terminator); +extern int breada(bstring b, bNread readPtr, void * parm); /* Stream functions */ -extern struct bStream * bsopen (bNread readPtr, void * parm); -extern void * bsclose (struct bStream * s); -extern int bsbufflength (struct bStream * s, int sz); -extern int bsreadln (bstring b, struct bStream * s, char terminator); -extern int bsreadlns (bstring r, struct bStream * s, const_bstring term); -extern int bsread (bstring b, struct bStream * s, int n); -extern int bsreadlna (bstring b, struct bStream * s, char terminator); -extern int bsreadlnsa (bstring r, struct bStream * s, const_bstring term); -extern int bsreada (bstring b, struct bStream * s, int n); -extern int bsunread (struct bStream * s, const_bstring b); -extern int bspeek (bstring r, const struct bStream * s); -extern int bssplitscb (struct bStream * s, const_bstring splitStr, - int (* cb) (void * parm, int ofs, const_bstring entry), void * parm); -extern int bssplitstrcb (struct bStream * s, const_bstring splitStr, - int (* cb) (void * parm, int ofs, const_bstring entry), void * parm); -extern int bseof (const struct bStream * s); +extern struct bStream * bsopen(bNread readPtr, void * parm); +extern void * bsclose(struct bStream * s); +extern int bsbufflength(struct bStream * s, int sz); +extern int bsreadln(bstring b, struct bStream * s, char terminator); +extern int bsreadlns(bstring r, struct bStream * s, const_bstring term); +extern int bsread(bstring b, struct bStream * s, int n); +extern int bsreadlna(bstring b, struct bStream * s, char terminator); +extern int bsreadlnsa(bstring r, struct bStream * s, const_bstring term); +extern int bsreada(bstring b, struct bStream * s, int n); +extern int bsunread(struct bStream * s, const_bstring b); +extern int bspeek(bstring r, const struct bStream * s); +extern int bssplitscb(struct bStream * s, const_bstring splitStr, + int (* cb)(void * parm, int ofs, const_bstring entry), void * parm); +extern int bssplitstrcb(struct bStream * s, const_bstring splitStr, + int (* cb)(void * parm, int ofs, const_bstring entry), void * parm); +extern int bseof(const struct bStream * s); -struct tagbstring { - int mlen; - int slen; - unsigned char * data; +struct tagbstring +{ + int mlen; + int slen; + unsigned char * data; }; /* Accessor macros */ @@ -203,7 +205,7 @@ struct tagbstring { #define bchar(b, p) bchare ((b), (p), '\0') /* Static constant string initialization macro */ -#define bsStaticMlen(q,m) {(m), (int) sizeof(q)-1, (unsigned char *) ("" q "")} +#define bsStaticMlen(q, m) {(m), (int) sizeof(q)-1, (unsigned char *) ("" q "")} #if defined(_MSC_VER) /* There are many versions of MSVC which emit __LINE__ as a non-constant. */ # define bsStatic(q) bsStaticMlen(q,-32) @@ -217,18 +219,18 @@ struct tagbstring { /* Reference building macros */ #define cstr2tbstr btfromcstr -#define btfromcstr(t,s) { \ +#define btfromcstr(t, s) { \ (t).data = (unsigned char *) (s); \ (t).slen = ((t).data) ? ((int) (strlen) ((char *)(t).data)) : 0; \ (t).mlen = -1; \ } -#define blk2tbstr(t,s,l) { \ +#define blk2tbstr(t, s, l) { \ (t).data = (unsigned char *) (s); \ (t).slen = l; \ (t).mlen = -1; \ } -#define btfromblk(t,s,l) blk2tbstr(t,s,l) -#define bmid2tbstr(t,b,p,l) { \ +#define btfromblk(t, s, l) blk2tbstr(t,s,l) +#define bmid2tbstr(t, b, p, l) { \ const_bstring bstrtmp_s = (b); \ if (bstrtmp_s && bstrtmp_s->data && bstrtmp_s->slen >= 0) { \ int bstrtmp_left = (p); \ @@ -252,7 +254,7 @@ struct tagbstring { } \ (t).mlen = -__LINE__; \ } -#define btfromblkltrimws(t,s,l) { \ +#define btfromblkltrimws(t, s, l) { \ int bstrtmp_idx = 0, bstrtmp_len = (l); \ unsigned char * bstrtmp_s = (s); \ if (bstrtmp_s && bstrtmp_len >= 0) { \ @@ -264,7 +266,7 @@ struct tagbstring { (t).slen = bstrtmp_len - bstrtmp_idx; \ (t).mlen = -__LINE__; \ } -#define btfromblkrtrimws(t,s,l) { \ +#define btfromblkrtrimws(t, s, l) { \ int bstrtmp_len = (l) - 1; \ unsigned char * bstrtmp_s = (s); \ if (bstrtmp_s && bstrtmp_len >= 0) { \ @@ -276,7 +278,7 @@ struct tagbstring { (t).slen = bstrtmp_len + 1; \ (t).mlen = -__LINE__; \ } -#define btfromblktrimws(t,s,l) { \ +#define btfromblktrimws(t, s, l) { \ int bstrtmp_idx = 0, bstrtmp_len = (l) - 1; \ unsigned char * bstrtmp_s = (s); \ if (bstrtmp_s && bstrtmp_len >= 0) { \ diff --git a/src/decode.cpp b/src/decode.cpp index cd5f740..a96b235 100644 --- a/src/decode.cpp +++ b/src/decode.cpp @@ -1,4 +1,3 @@ - #include "internal_includes/tokens.h" #include "internal_includes/decode.h" #include "stdlib.h" @@ -31,17 +30,17 @@ enum { FOURCC_SFI0 = FOURCC('S', 'F', 'I', '0') }; // Chunks that we ignore typedef struct DXBCContainerHeaderTAG { - unsigned fourcc; - uint32_t unk[4]; - uint32_t one; - uint32_t totalSize; - uint32_t chunkCount; + unsigned fourcc; + uint32_t unk[4]; + uint32_t one; + uint32_t totalSize; + uint32_t chunkCount; } DXBCContainerHeader; typedef struct DXBCChunkHeaderTAG { - unsigned fourcc; - unsigned size; + unsigned fourcc; + unsigned size; } DXBCChunkHeader; #ifdef _DEBUG @@ -52,8 +51,8 @@ static uint64_t instructionID = 0; void DecodeNameToken(const uint32_t* pui32NameToken, Operand* psOperand) { psOperand->eSpecialName = DecodeOperandSpecialName(*pui32NameToken); - switch(psOperand->eSpecialName) - { + switch (psOperand->eSpecialName) + { case NAME_UNDEFINED: { psOperand->specialName = "undefined"; @@ -110,22 +109,22 @@ void DecodeNameToken(const uint32_t* pui32NameToken, Operand* psOperand) break; } //For the quadrilateral domain, there are 6 factors (4 sides, 2 inner). - case NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR: - case NAME_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR: - case NAME_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR: - case NAME_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR: - case NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR: - case NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR: + case NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR: + case NAME_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR: + case NAME_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR: + case NAME_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR: + case NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR: + case NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR: //For the triangular domain, there are 4 factors (3 sides, 1 inner) - case NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR: - case NAME_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR: - case NAME_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR: - case NAME_FINAL_TRI_INSIDE_TESSFACTOR: + case NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR: + case NAME_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR: + case NAME_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR: + case NAME_FINAL_TRI_INSIDE_TESSFACTOR: //For the isoline domain, there are 2 factors (detail and density). - case NAME_FINAL_LINE_DETAIL_TESSFACTOR: - case NAME_FINAL_LINE_DENSITY_TESSFACTOR: + case NAME_FINAL_LINE_DETAIL_TESSFACTOR: + case NAME_FINAL_LINE_DENSITY_TESSFACTOR: { psOperand->specialName = "tessFactor"; break; @@ -136,28 +135,26 @@ void DecodeNameToken(const uint32_t* pui32NameToken, Operand* psOperand) break; } } - - return; } // Find the declaration of the texture described by psTextureOperand and // mark it as a shadow type. (e.g. accessed via sampler2DShadow rather than sampler2D) static void MarkTextureAsShadow(ShaderInfo* psShaderInfo, std::vector &declarations, const Operand* psTextureOperand) { - ASSERT(psTextureOperand->eType == OPERAND_TYPE_RESOURCE); - - for (std::vector::iterator psDecl = declarations.begin(); psDecl != declarations.end(); psDecl++) - { - if(psDecl->eOpcode == OPCODE_DCL_RESOURCE) - { - if(psDecl->asOperands[0].eType == OPERAND_TYPE_RESOURCE && - psDecl->asOperands[0].ui32RegisterNumber == psTextureOperand->ui32RegisterNumber) - { - psDecl->ui32IsShadowTex = 1; - break; - } - } - } + ASSERT(psTextureOperand->eType == OPERAND_TYPE_RESOURCE); + + for (std::vector::iterator psDecl = declarations.begin(); psDecl != declarations.end(); psDecl++) + { + if (psDecl->eOpcode == OPCODE_DCL_RESOURCE) + { + if (psDecl->asOperands[0].eType == OPERAND_TYPE_RESOURCE && + psDecl->asOperands[0].ui32RegisterNumber == psTextureOperand->ui32RegisterNumber) + { + psDecl->ui32IsShadowTex = 1; + break; + } + } + } } static void MarkTextureSamplerPair(ShaderInfo* psShaderInfo, std::vector & declarations, const Operand* psTextureOperand, const Operand* psSamplerOperand, TextureSamplerPairs& samplers) @@ -165,22 +162,22 @@ static void MarkTextureSamplerPair(ShaderInfo* psShaderInfo, std::vectoreType == OPERAND_TYPE_RESOURCE); ASSERT(psSamplerOperand->eType == OPERAND_TYPE_SAMPLER); - for (std::vector::iterator psDecl = declarations.begin(); psDecl != declarations.end(); psDecl++) + for (std::vector::iterator psDecl = declarations.begin(); psDecl != declarations.end(); psDecl++) { - if(psDecl->eOpcode == OPCODE_DCL_RESOURCE) + if (psDecl->eOpcode == OPCODE_DCL_RESOURCE) { - if(psDecl->asOperands[0].eType == OPERAND_TYPE_RESOURCE && + if (psDecl->asOperands[0].eType == OPERAND_TYPE_RESOURCE && psDecl->asOperands[0].ui32RegisterNumber == psTextureOperand->ui32RegisterNumber) { // psDecl is the texture resource referenced by psTextureOperand // add psSamplerOperand->ui32RegisterNumber to list of samplers that use this texture - // set::insert returns a pair of which .second tells whether a new element was actually added - if (psDecl->samplersUsed.insert(psSamplerOperand->ui32RegisterNumber).second) + // set::insert returns a pair of which .second tells whether a new element was actually added + if (psDecl->samplersUsed.insert(psSamplerOperand->ui32RegisterNumber).second) { // Record the TEX_with_SMP string in the TextureSamplerPair array that we return to the client std::string combinedname = TextureSamplerName(psShaderInfo, psTextureOperand->ui32RegisterNumber, psSamplerOperand->ui32RegisterNumber, psDecl->ui32IsShadowTex); - samplers.push_back(combinedname); + samplers.push_back(combinedname); } break; } @@ -188,10 +185,10 @@ static void MarkTextureSamplerPair(ShaderInfo* psShaderInfo, std::vectoriWriteMaskEnabled = 1; psOperand->iGSInput = 0; - psOperand->iPSInOut = 0; - psOperand->aeDataType[0] = SVT_FLOAT; - psOperand->aeDataType[1] = SVT_FLOAT; - psOperand->aeDataType[2] = SVT_FLOAT; - psOperand->aeDataType[3] = SVT_FLOAT; + psOperand->iPSInOut = 0; + psOperand->aeDataType[0] = SVT_FLOAT; + psOperand->aeDataType[1] = SVT_FLOAT; + psOperand->aeDataType[2] = SVT_FLOAT; + psOperand->aeDataType[3] = SVT_FLOAT; psOperand->iExtended = DecodeIsOperandExtended(*pui32Tokens); @@ -215,37 +212,36 @@ uint32_t DecodeOperand (const uint32_t *pui32Tokens, Operand* psOperand) psOperand->m_SubOperands[1].reset(); psOperand->m_SubOperands[2].reset(); - psOperand->eMinPrecision = OPERAND_MIN_PRECISION_DEFAULT; + psOperand->eMinPrecision = OPERAND_MIN_PRECISION_DEFAULT; - /* Check if this instruction is extended. If it is, - * we need to print the information first */ - if (psOperand->iExtended) - { - /* OperandToken1 is the second token */ - ui32NumTokens++; + /* Check if this instruction is extended. If it is, + * we need to print the information first */ + if (psOperand->iExtended) + { + /* OperandToken1 is the second token */ + ui32NumTokens++; - if(DecodeExtendedOperandType(pui32Tokens[1]) == EXTENDED_OPERAND_MODIFIER) + if (DecodeExtendedOperandType(pui32Tokens[1]) == EXTENDED_OPERAND_MODIFIER) { psOperand->eModifier = DecodeExtendedOperandModifier(pui32Tokens[1]); psOperand->eMinPrecision = (OPERAND_MIN_PRECISION)DecodeOperandMinPrecision(pui32Tokens[1]); } + } - } - - psOperand->iIndexDims = DecodeOperandIndexDimension(*pui32Tokens); + psOperand->iIndexDims = DecodeOperandIndexDimension(*pui32Tokens); psOperand->eType = DecodeOperandType(*pui32Tokens); psOperand->ui32RegisterNumber = 0; eNumComponents = DecodeOperandNumComponents(*pui32Tokens); - if (psOperand->eType == OPERAND_TYPE_INPUT_GS_INSTANCE_ID) - { - eNumComponents = OPERAND_1_COMPONENT; - psOperand->aeDataType[0] = SVT_UINT; - } + if (psOperand->eType == OPERAND_TYPE_INPUT_GS_INSTANCE_ID) + { + eNumComponents = OPERAND_1_COMPONENT; + psOperand->aeDataType[0] = SVT_UINT; + } - switch(eNumComponents) + switch (eNumComponents) { case OPERAND_1_COMPONENT: { @@ -264,112 +260,102 @@ uint32_t DecodeOperand (const uint32_t *pui32Tokens, Operand* psOperand) } } - if(psOperand->iWriteMaskEnabled && - psOperand->iNumComponents == 4) + if (psOperand->iWriteMaskEnabled && + psOperand->iNumComponents == 4) { psOperand->eSelMode = DecodeOperand4CompSelMode(*pui32Tokens); - if(psOperand->eSelMode == OPERAND_4_COMPONENT_MASK_MODE) + if (psOperand->eSelMode == OPERAND_4_COMPONENT_MASK_MODE) { psOperand->ui32CompMask = DecodeOperand4CompMask(*pui32Tokens); } - else - if(psOperand->eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE) + else if (psOperand->eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE) { psOperand->ui32Swizzle = DecodeOperand4CompSwizzle(*pui32Tokens); - if(psOperand->ui32Swizzle != NO_SWIZZLE) + if (psOperand->ui32Swizzle != NO_SWIZZLE) { psOperand->aui32Swizzle[0] = DecodeOperand4CompSwizzleSource(*pui32Tokens, 0); psOperand->aui32Swizzle[1] = DecodeOperand4CompSwizzleSource(*pui32Tokens, 1); psOperand->aui32Swizzle[2] = DecodeOperand4CompSwizzleSource(*pui32Tokens, 2); psOperand->aui32Swizzle[3] = DecodeOperand4CompSwizzleSource(*pui32Tokens, 3); } - else - { - psOperand->aui32Swizzle[0] = OPERAND_4_COMPONENT_X; - psOperand->aui32Swizzle[1] = OPERAND_4_COMPONENT_Y; - psOperand->aui32Swizzle[2] = OPERAND_4_COMPONENT_Z; - psOperand->aui32Swizzle[3] = OPERAND_4_COMPONENT_W; - } + else + { + psOperand->aui32Swizzle[0] = OPERAND_4_COMPONENT_X; + psOperand->aui32Swizzle[1] = OPERAND_4_COMPONENT_Y; + psOperand->aui32Swizzle[2] = OPERAND_4_COMPONENT_Z; + psOperand->aui32Swizzle[3] = OPERAND_4_COMPONENT_W; + } } - else - if(psOperand->eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE) + else if (psOperand->eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE) { psOperand->aui32Swizzle[0] = DecodeOperand4CompSel1(*pui32Tokens); } } - if(psOperand->eType == OPERAND_TYPE_IMMEDIATE32) + if (psOperand->eType == OPERAND_TYPE_IMMEDIATE32) { - for(i=0; i< psOperand->iNumComponents; ++i) + for (i = 0; i < psOperand->iNumComponents; ++i) { psOperand->afImmediates[i] = *((float*)(&pui32Tokens[ui32NumTokens])); - ui32NumTokens ++; + ui32NumTokens++; } } - else - if(psOperand->eType == OPERAND_TYPE_IMMEDIATE64) + else if (psOperand->eType == OPERAND_TYPE_IMMEDIATE64) { - for(i=0; i< psOperand->iNumComponents; ++i) + for (i = 0; i < psOperand->iNumComponents; ++i) { psOperand->adImmediates[i] = *((double*)(&pui32Tokens[ui32NumTokens])); - ui32NumTokens +=2; + ui32NumTokens += 2; } } - if(psOperand->eType == OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL || - psOperand->eType == OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL) - { - psOperand->ui32RegisterNumber = -1; - psOperand->ui32CompMask = -1; - } - - // Used only for Metal - if(psOperand->eType == OPERAND_TYPE_OUTPUT_DEPTH) - { - psOperand->ui32RegisterNumber = 0; - psOperand->ui32CompMask = 1; - } - - for(i=0; i iIndexDims; ++i) + // Used only for Metal + if (psOperand->eType == OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL || psOperand->eType == OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL || psOperand->eType == OPERAND_TYPE_OUTPUT_DEPTH) + { + psOperand->ui32RegisterNumber = 0; + psOperand->ui32CompMask = 1; + } + + for (i = 0; i < psOperand->iIndexDims; ++i) { - OPERAND_INDEX_REPRESENTATION eRep = DecodeOperandIndexRepresentation(i ,*pui32Tokens); + OPERAND_INDEX_REPRESENTATION eRep = DecodeOperandIndexRepresentation(i , *pui32Tokens); psOperand->eIndexRep[i] = eRep; psOperand->aui32ArraySizes[i] = 0; psOperand->ui32RegisterNumber = 0; - switch(eRep) + switch (eRep) { case OPERAND_INDEX_IMMEDIATE32: { - psOperand->ui32RegisterNumber = *(pui32Tokens+ui32NumTokens); + psOperand->ui32RegisterNumber = *(pui32Tokens + ui32NumTokens); psOperand->aui32ArraySizes[i] = psOperand->ui32RegisterNumber; break; } case OPERAND_INDEX_RELATIVE: { - psOperand->m_SubOperands[i].reset(new Operand()); - DecodeOperand(pui32Tokens+ui32NumTokens, psOperand->m_SubOperands[i].get()); + psOperand->m_SubOperands[i].reset(new Operand()); + DecodeOperand(pui32Tokens + ui32NumTokens, psOperand->m_SubOperands[i].get()); - ui32NumTokens++; + ui32NumTokens++; break; } - case OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE: - { - psOperand->ui32RegisterNumber = *(pui32Tokens+ui32NumTokens); + case OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE: + { + psOperand->ui32RegisterNumber = *(pui32Tokens + ui32NumTokens); psOperand->aui32ArraySizes[i] = psOperand->ui32RegisterNumber; ui32NumTokens++; - psOperand->m_SubOperands[i].reset(new Operand()); - DecodeOperand(pui32Tokens + ui32NumTokens, psOperand->m_SubOperands[i].get()); + psOperand->m_SubOperands[i].reset(new Operand()); + DecodeOperand(pui32Tokens + ui32NumTokens, psOperand->m_SubOperands[i].get()); - ui32NumTokens++; - break; - } + ui32NumTokens++; + break; + } default: { ASSERT(0); @@ -377,20 +363,20 @@ uint32_t DecodeOperand (const uint32_t *pui32Tokens, Operand* psOperand) } } - // Indices should be ints - switch(eRep) + // Indices should be ints + switch (eRep) { case OPERAND_INDEX_IMMEDIATE32: - case OPERAND_INDEX_RELATIVE: - case OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE: - { - int j = 0; - for(; j < psOperand->iNumComponents; j++) - { - psOperand->aeDataType[j] = SVT_INT; - } - break; - } + case OPERAND_INDEX_RELATIVE: + case OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE: + { + int j = 0; + for (; j < psOperand->iNumComponents; j++) + { + psOperand->aeDataType[j] = SVT_INT; + } + break; + } default: { break; @@ -411,16 +397,16 @@ const uint32_t* DecodeDeclaration(Shader* psShader, const uint32_t* pui32Token, const OPCODE_TYPE eOpcode = DecodeOpcodeType(*pui32Token); uint32_t ui32OperandOffset = 1; - if(eOpcode < NUM_OPCODES && eOpcode >= 0) + if (eOpcode < NUM_OPCODES && eOpcode >= 0) { psShader->aiOpcodeUsed[eOpcode] = 1; } psDecl->eOpcode = eOpcode; - psDecl->ui32IsShadowTex = 0; + psDecl->ui32IsShadowTex = 0; - if(bExtended) + if (bExtended) { ui32OperandOffset = 2; } @@ -431,52 +417,52 @@ const uint32_t* DecodeDeclaration(Shader* psShader, const uint32_t* pui32Token, { psDecl->value.eResourceDimension = DecodeResourceDimension(*pui32Token); psDecl->ui32NumOperands = 1; - DecodeOperand(pui32Token+ui32OperandOffset, &psDecl->asOperands[0]); + DecodeOperand(pui32Token + ui32OperandOffset, &psDecl->asOperands[0]); break; } case OPCODE_DCL_CONSTANT_BUFFER: // custom operand formats. { psDecl->ui32NumOperands = 1; - DecodeOperand(pui32Token+ui32OperandOffset, &psDecl->asOperands[0]); + DecodeOperand(pui32Token + ui32OperandOffset, &psDecl->asOperands[0]); break; } case OPCODE_DCL_SAMPLER: { - psDecl->ui32NumOperands = 1; - psDecl->value.eSamplerMode = DecodeSamplerMode(*pui32Token); + psDecl->ui32NumOperands = 1; + psDecl->value.eSamplerMode = DecodeSamplerMode(*pui32Token); - DecodeOperand(pui32Token+ui32OperandOffset, &psDecl->asOperands[0]); + DecodeOperand(pui32Token + ui32OperandOffset, &psDecl->asOperands[0]); break; } case OPCODE_DCL_INDEX_RANGE: { - int regSpace = 0; + int regSpace = 0; psDecl->ui32NumOperands = 1; - ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psDecl->asOperands[0]); + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psDecl->asOperands[0]); psDecl->value.ui32IndexRange = pui32Token[ui32OperandOffset]; - regSpace = psDecl->asOperands[0].GetRegisterSpace(psShader->eShaderType, psPhase->ePhase); - if(psDecl->asOperands[0].eType == OPERAND_TYPE_INPUT) + regSpace = psDecl->asOperands[0].GetRegisterSpace(psShader->eShaderType, psPhase->ePhase); + if (psDecl->asOperands[0].eType == OPERAND_TYPE_INPUT) { uint32_t i; const uint32_t indexRange = psDecl->value.ui32IndexRange; const uint32_t reg = psDecl->asOperands[0].ui32RegisterNumber; psShader->aIndexedInput[regSpace][reg] = indexRange; - psShader->aIndexedInputParents[regSpace][reg] = reg; + psShader->aIndexedInputParents[regSpace][reg] = reg; //-1 means don't declare this input because it falls in //the range of an already declared array. - for(i=reg+1; iaIndexedInput[regSpace][i] = -1; - psShader->aIndexedInputParents[regSpace][i] = reg; + psShader->aIndexedInput[regSpace][i] = -1; + psShader->aIndexedInputParents[regSpace][i] = reg; } } - if(psDecl->asOperands[0].eType == OPERAND_TYPE_OUTPUT) + if (psDecl->asOperands[0].eType == OPERAND_TYPE_OUTPUT) { - psShader->aIndexedOutput[regSpace][psDecl->asOperands[0].ui32RegisterNumber] = true;; + psShader->aIndexedOutput[regSpace][psDecl->asOperands[0].ui32RegisterNumber] = true; } break; } @@ -520,17 +506,16 @@ const uint32_t* DecodeDeclaration(Shader* psShader, const uint32_t* pui32Token, case OPCODE_DCL_INPUT: { psDecl->ui32NumOperands = 1; - DecodeOperand(pui32Token+ui32OperandOffset, &psDecl->asOperands[0]); + DecodeOperand(pui32Token + ui32OperandOffset, &psDecl->asOperands[0]); break; } case OPCODE_DCL_INPUT_SIV: { psDecl->ui32NumOperands = 1; - DecodeOperand(pui32Token+ui32OperandOffset, &psDecl->asOperands[0]); - if(psShader->eShaderType == PIXEL_SHADER) + DecodeOperand(pui32Token + ui32OperandOffset, &psDecl->asOperands[0]); + if (psShader->eShaderType == PIXEL_SHADER) { psDecl->value.eInterpolation = DecodeInterpolationMode(*pui32Token); - } break; } @@ -539,14 +524,14 @@ const uint32_t* DecodeDeclaration(Shader* psShader, const uint32_t* pui32Token, psDecl->ui32NumOperands = 1; psDecl->value.eInterpolation = DecodeInterpolationMode(*pui32Token); Operand* psOperand = &psDecl->asOperands[0]; - DecodeOperand(pui32Token+ui32OperandOffset, psOperand); + DecodeOperand(pui32Token + ui32OperandOffset, psOperand); ShaderInfo::InOutSignature *psSig = NULL; - psShader->sInfo.GetInputSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->ui32CompMask, (const ShaderInfo::InOutSignature**) &psSig); + psShader->sInfo.GetInputSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->ui32CompMask, (const ShaderInfo::InOutSignature**)&psSig); /* UNITY_FRAMEBUFFER_FETCH_AVAILABLE special case mapping for inout color. - + In the fragment shader, setting inout var : SV_Target would result to compiler error, unless SV_Target is defined to COLOR semantic for compatibility reasons. Unfortunately, we still need to have a clear distinction between @@ -554,7 +539,7 @@ const uint32_t* DecodeDeclaration(Shader* psShader, const uint32_t* pui32Token, the fact that semantic names are case insensitive and preprocessor macros are not. The resulting HLSL bytecode has semantics in case preserving form, helps code generator to do extra work required for framebuffer fetch - + See also HLSLSupport.cginc */ if (psSig->eSystemValueType == NAME_UNDEFINED && @@ -571,22 +556,22 @@ const uint32_t* DecodeDeclaration(Shader* psShader, const uint32_t* pui32Token, case OPCODE_DCL_INPUT_PS_SGV: { psDecl->ui32NumOperands = 1; - DecodeOperand(pui32Token+ui32OperandOffset, &psDecl->asOperands[0]); + DecodeOperand(pui32Token + ui32OperandOffset, &psDecl->asOperands[0]); + DecodeNameToken(pui32Token + 3, &psDecl->asOperands[0]); + break; + } + case OPCODE_DCL_INPUT_PS_SIV: + { + psDecl->ui32NumOperands = 1; + psDecl->value.eInterpolation = DecodeInterpolationMode(*pui32Token); + DecodeOperand(pui32Token + ui32OperandOffset, &psDecl->asOperands[0]); DecodeNameToken(pui32Token + 3, &psDecl->asOperands[0]); break; } - case OPCODE_DCL_INPUT_PS_SIV: - { - psDecl->ui32NumOperands = 1; - psDecl->value.eInterpolation = DecodeInterpolationMode(*pui32Token); - DecodeOperand(pui32Token+ui32OperandOffset, &psDecl->asOperands[0]); - DecodeNameToken(pui32Token + 3, &psDecl->asOperands[0]); - break; - } case OPCODE_DCL_OUTPUT: { psDecl->ui32NumOperands = 1; - DecodeOperand(pui32Token+ui32OperandOffset, &psDecl->asOperands[0]); + DecodeOperand(pui32Token + ui32OperandOffset, &psDecl->asOperands[0]); break; } case OPCODE_DCL_OUTPUT_SGV: @@ -596,20 +581,20 @@ const uint32_t* DecodeDeclaration(Shader* psShader, const uint32_t* pui32Token, case OPCODE_DCL_OUTPUT_SIV: { psDecl->ui32NumOperands = 1; - DecodeOperand(pui32Token+ui32OperandOffset, &psDecl->asOperands[0]); + DecodeOperand(pui32Token + ui32OperandOffset, &psDecl->asOperands[0]); DecodeNameToken(pui32Token + 3, &psDecl->asOperands[0]); break; } case OPCODE_DCL_TEMPS: { - psDecl->value.ui32NumTemps = *(pui32Token+ui32OperandOffset); + psDecl->value.ui32NumTemps = *(pui32Token + ui32OperandOffset); break; } case OPCODE_DCL_INDEXABLE_TEMP: { - psDecl->sIdxTemp.ui32RegIndex = *(pui32Token+ui32OperandOffset); - psDecl->sIdxTemp.ui32RegCount = *(pui32Token+ui32OperandOffset+1); - psDecl->sIdxTemp.ui32RegComponentSize = *(pui32Token+ui32OperandOffset+2); + psDecl->sIdxTemp.ui32RegIndex = *(pui32Token + ui32OperandOffset); + psDecl->sIdxTemp.ui32RegCount = *(pui32Token + ui32OperandOffset + 1); + psDecl->sIdxTemp.ui32RegComponentSize = *(pui32Token + ui32OperandOffset + 2); break; } case OPCODE_DCL_GLOBAL_FLAGS: @@ -625,8 +610,8 @@ const uint32_t* DecodeDeclaration(Shader* psShader, const uint32_t* pui32Token, psDecl->ui32TableLength = pui32Token[ui32OperandOffset]; ui32OperandOffset++; - numClassesImplementingThisInterface = DecodeInterfaceTableLength(*(pui32Token+ui32OperandOffset)); - arrayLen = DecodeInterfaceArrayLength(*(pui32Token+ui32OperandOffset)); + numClassesImplementingThisInterface = DecodeInterfaceTableLength(*(pui32Token + ui32OperandOffset)); + arrayLen = DecodeInterfaceArrayLength(*(pui32Token + ui32OperandOffset)); ui32OperandOffset++; @@ -636,9 +621,9 @@ const uint32_t* DecodeDeclaration(Shader* psShader, const uint32_t* pui32Token, psShader->funcPointer[interfaceID].ui32NumBodiesPerTable = psDecl->ui32TableLength; - for(;func < numClassesImplementingThisInterface; ++func) + for (; func < numClassesImplementingThisInterface; ++func) { - uint32_t ui32FuncTable = *(pui32Token+ui32OperandOffset); + uint32_t ui32FuncTable = *(pui32Token + ui32OperandOffset); psShader->aui32FuncTableToFuncPointer[ui32FuncTable] = interfaceID; psShader->funcPointer[interfaceID].aui32FuncTables[func] = ui32FuncTable; @@ -650,7 +635,7 @@ const uint32_t* DecodeDeclaration(Shader* psShader, const uint32_t* pui32Token, case OPCODE_DCL_FUNCTION_BODY: { psDecl->ui32NumOperands = 1; - DecodeOperand(pui32Token+ui32OperandOffset, &psDecl->asOperands[0]); + DecodeOperand(pui32Token + ui32OperandOffset, &psDecl->asOperands[0]); break; } case OPCODE_DCL_FUNCTION_TABLE: @@ -659,14 +644,13 @@ const uint32_t* DecodeDeclaration(Shader* psShader, const uint32_t* pui32Token, const uint32_t ui32FuncTableID = pui32Token[ui32OperandOffset++]; const uint32_t ui32NumFuncsInTable = pui32Token[ui32OperandOffset++]; - for(ui32Func=0; ui32Funcaui32FuncBodyToFuncTable[ui32FuncBodyID] = ui32FuncTableID; psShader->funcTable[ui32FuncTableID].aui32FuncBodies[ui32Func] = ui32FuncBodyID; - } // OpcodeToken0 is followed by a DWORD that represents the function table @@ -678,55 +662,55 @@ const uint32_t* DecodeDeclaration(Shader* psShader, const uint32_t* pui32Token, break; } - case OPCODE_DCL_INPUT_CONTROL_POINT_COUNT: - { - psDecl->value.ui32MaxOutputVertexCount = DecodeOutputControlPointCount(*pui32Token); - break; - } - case OPCODE_HS_DECLS: - { - break; - } - case OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT: - { - psDecl->value.ui32MaxOutputVertexCount = DecodeOutputControlPointCount(*pui32Token); - break; - } - case OPCODE_HS_JOIN_PHASE: - case OPCODE_HS_FORK_PHASE: - case OPCODE_HS_CONTROL_POINT_PHASE: - { - break; - } - case OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT: - case OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT: - { + case OPCODE_DCL_INPUT_CONTROL_POINT_COUNT: + { + psDecl->value.ui32MaxOutputVertexCount = DecodeOutputControlPointCount(*pui32Token); + break; + } + case OPCODE_HS_DECLS: + { + break; + } + case OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT: + { + psDecl->value.ui32MaxOutputVertexCount = DecodeOutputControlPointCount(*pui32Token); + break; + } + case OPCODE_HS_JOIN_PHASE: + case OPCODE_HS_FORK_PHASE: + case OPCODE_HS_CONTROL_POINT_PHASE: + { + break; + } + case OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT: + case OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT: + { psDecl->value.ui32HullPhaseInstanceCount = pui32Token[1]; - psPhase->ui32InstanceCount = psDecl->value.ui32HullPhaseInstanceCount; - break; - } - case OPCODE_CUSTOMDATA: - { - ui32TokenLength = pui32Token[1]; - { -// int iTupleSrc = 0, iTupleDest = 0; - //const uint32_t ui32ConstCount = pui32Token[1] - 2; - //const uint32_t ui32TupleCount = (ui32ConstCount / 4); - /*CUSTOMDATA_CLASS eClass =*/ DecodeCustomDataClass(pui32Token[0]); - - const uint32_t ui32NumVec4 = (ui32TokenLength - 2) / 4; - - ICBVec4 const *pVec4Array = (ICBVec4 const *)(void*) (pui32Token + 2); - - /* must be a multiple of 4 */ - ASSERT(((ui32TokenLength - 2) % 4) == 0); - - psDecl->asImmediateConstBuffer.assign(pVec4Array, pVec4Array + ui32NumVec4); - - psDecl->ui32NumOperands = ui32NumVec4; - } - break; - } + psPhase->ui32InstanceCount = psDecl->value.ui32HullPhaseInstanceCount; + break; + } + case OPCODE_CUSTOMDATA: + { + ui32TokenLength = pui32Token[1]; + { +// int iTupleSrc = 0, iTupleDest = 0; + //const uint32_t ui32ConstCount = pui32Token[1] - 2; + //const uint32_t ui32TupleCount = (ui32ConstCount / 4); + /*CUSTOMDATA_CLASS eClass =*/ DecodeCustomDataClass(pui32Token[0]); + + const uint32_t ui32NumVec4 = (ui32TokenLength - 2) / 4; + + ICBVec4 const *pVec4Array = (ICBVec4 const *)(void*)(pui32Token + 2); + + /* must be a multiple of 4 */ + ASSERT(((ui32TokenLength - 2) % 4) == 0); + + psDecl->asImmediateConstBuffer.assign(pVec4Array, pVec4Array + ui32NumVec4); + + psDecl->ui32NumOperands = ui32NumVec4; + } + break; + } case OPCODE_DCL_HS_MAX_TESSFACTOR: { psDecl->value.fMaxTessFactor = *((float*)&pui32Token[1]); @@ -737,21 +721,21 @@ const uint32_t* DecodeDeclaration(Shader* psShader, const uint32_t* pui32Token, psDecl->ui32NumOperands = 2; psDecl->value.eResourceDimension = DecodeResourceDimension(*pui32Token); psDecl->sUAV.ui32GloballyCoherentAccess = DecodeAccessCoherencyFlags(*pui32Token); - psDecl->sUAV.bCounter = 0; - psDecl->ui32BufferStride = 4; - ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psDecl->asOperands[0]); - psDecl->sUAV.Type = DecodeResourceReturnType(0, pui32Token[ui32OperandOffset]); + psDecl->sUAV.bCounter = 0; + psDecl->ui32BufferStride = 4; + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psDecl->asOperands[0]); + psDecl->sUAV.Type = DecodeResourceReturnType(0, pui32Token[ui32OperandOffset]); break; } case OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW: { psDecl->ui32NumOperands = 1; psDecl->sUAV.ui32GloballyCoherentAccess = DecodeAccessCoherencyFlags(*pui32Token); - psDecl->sUAV.bCounter = 0; - psDecl->ui32BufferStride = 4; - DecodeOperand(pui32Token+ui32OperandOffset, &psDecl->asOperands[0]); - //This should be a RTYPE_UAV_RWBYTEADDRESS buffer. It is memory backed by - //a shader storage buffer whose is unknown at compile time. + psDecl->sUAV.bCounter = 0; + psDecl->ui32BufferStride = 4; + DecodeOperand(pui32Token + ui32OperandOffset, &psDecl->asOperands[0]); + //This should be a RTYPE_UAV_RWBYTEADDRESS buffer. It is memory backed by + //a shader storage buffer whose is unknown at compile time. break; } case OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED: @@ -761,42 +745,42 @@ const uint32_t* DecodeDeclaration(Shader* psShader, const uint32_t* pui32Token, psDecl->ui32NumOperands = 1; psDecl->sUAV.ui32GloballyCoherentAccess = DecodeAccessCoherencyFlags(*pui32Token); - psDecl->sUAV.bCounter = 0; - DecodeOperand(pui32Token+ui32OperandOffset, &psDecl->asOperands[0]); - - psShader->sInfo.GetResourceFromBindingPoint(RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, &psBinding); - psShader->sInfo.GetConstantBufferFromBindingPoint(RGROUP_UAV, psBinding->ui32BindPoint, &psBuffer); - psDecl->ui32BufferStride = psBuffer->ui32TotalSizeInBytes; - - switch(psBinding->eType) - { - case RTYPE_UAV_RWSTRUCTURED_WITH_COUNTER: - case RTYPE_UAV_APPEND_STRUCTURED: - case RTYPE_UAV_CONSUME_STRUCTURED: - psDecl->sUAV.bCounter = 1; - break; - default: - break; - } + psDecl->sUAV.bCounter = 0; + DecodeOperand(pui32Token + ui32OperandOffset, &psDecl->asOperands[0]); + + psShader->sInfo.GetResourceFromBindingPoint(RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, &psBinding); + psShader->sInfo.GetConstantBufferFromBindingPoint(RGROUP_UAV, psBinding->ui32BindPoint, &psBuffer); + psDecl->ui32BufferStride = psBuffer->ui32TotalSizeInBytes; + + switch (psBinding->eType) + { + case RTYPE_UAV_RWSTRUCTURED_WITH_COUNTER: + case RTYPE_UAV_APPEND_STRUCTURED: + case RTYPE_UAV_CONSUME_STRUCTURED: + psDecl->sUAV.bCounter = 1; + break; + default: + break; + } break; } case OPCODE_DCL_RESOURCE_STRUCTURED: { - const ResourceBinding* psBinding = NULL; - const ConstantBuffer* psBuffer = NULL; + const ResourceBinding* psBinding = NULL; + const ConstantBuffer* psBuffer = NULL; psDecl->ui32NumOperands = 1; - DecodeOperand(pui32Token+ui32OperandOffset, &psDecl->asOperands[0]); + DecodeOperand(pui32Token + ui32OperandOffset, &psDecl->asOperands[0]); - psShader->sInfo.GetResourceFromBindingPoint(RGROUP_TEXTURE, psDecl->asOperands[0].ui32RegisterNumber, &psBinding); - psShader->sInfo.GetConstantBufferFromBindingPoint(RGROUP_TEXTURE, psBinding->ui32BindPoint, &psBuffer); - psDecl->ui32BufferStride = psBuffer->ui32TotalSizeInBytes; + psShader->sInfo.GetResourceFromBindingPoint(RGROUP_TEXTURE, psDecl->asOperands[0].ui32RegisterNumber, &psBinding); + psShader->sInfo.GetConstantBufferFromBindingPoint(RGROUP_TEXTURE, psBinding->ui32BindPoint, &psBuffer); + psDecl->ui32BufferStride = psBuffer->ui32TotalSizeInBytes; break; } case OPCODE_DCL_RESOURCE_RAW: { psDecl->ui32NumOperands = 1; - psDecl->ui32BufferStride = 4; - DecodeOperand(pui32Token+ui32OperandOffset, &psDecl->asOperands[0]); + psDecl->ui32BufferStride = 4; + DecodeOperand(pui32Token + ui32OperandOffset, &psDecl->asOperands[0]); break; } case OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_STRUCTURED: @@ -804,7 +788,7 @@ const uint32_t* DecodeDeclaration(Shader* psShader, const uint32_t* pui32Token, psDecl->ui32NumOperands = 1; psDecl->sUAV.ui32GloballyCoherentAccess = 0; - ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psDecl->asOperands[0]); + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psDecl->asOperands[0]); psDecl->sTGSM.ui32Stride = pui32Token[ui32OperandOffset++]; psDecl->sTGSM.ui32Count = pui32Token[ui32OperandOffset++]; @@ -815,24 +799,24 @@ const uint32_t* DecodeDeclaration(Shader* psShader, const uint32_t* pui32Token, psDecl->ui32NumOperands = 1; psDecl->sUAV.ui32GloballyCoherentAccess = 0; - ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psDecl->asOperands[0]); + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psDecl->asOperands[0]); psDecl->sTGSM.ui32Stride = 4; psDecl->sTGSM.ui32Count = pui32Token[ui32OperandOffset++]; break; } - case OPCODE_DCL_STREAM: - { - psDecl->ui32NumOperands = 1; - DecodeOperand(pui32Token+ui32OperandOffset, &psDecl->asOperands[0]); - break; - } - case OPCODE_DCL_GS_INSTANCE_COUNT: - { - psDecl->ui32NumOperands = 0; - psDecl->value.ui32GSInstanceCount = pui32Token[1]; - break; - } + case OPCODE_DCL_STREAM: + { + psDecl->ui32NumOperands = 1; + DecodeOperand(pui32Token + ui32OperandOffset, &psDecl->asOperands[0]); + break; + } + case OPCODE_DCL_GS_INSTANCE_COUNT: + { + psDecl->ui32NumOperands = 0; + psDecl->value.ui32GSInstanceCount = pui32Token[1]; + break; + } default: { //Reached end of declarations @@ -860,53 +844,54 @@ const uint32_t* DecodeInstruction(const uint32_t* pui32Token, Instruction* psIns psInst->bAddressOffset = 0; - psInst->ui32FirstSrc = 1; - - psInst->iCausedSplit = 0; + psInst->ui32FirstSrc = 1; - if(bExtended) + psInst->iCausedSplit = 0; + + if (bExtended) { - do { + do + { const uint32_t ui32ExtOpcodeToken = pui32Token[ui32OperandOffset]; const EXTENDED_OPCODE_TYPE eExtType = DecodeExtendedOpcodeType(ui32ExtOpcodeToken); - if(eExtType == EXTENDED_OPCODE_SAMPLE_CONTROLS) + if (eExtType == EXTENDED_OPCODE_SAMPLE_CONTROLS) { - struct {int i4:4;} sU; - struct {int i4:4;} sV; - struct {int i4:4;} sW; + struct {int i4 : 4;} sU; + struct {int i4 : 4;} sV; + struct {int i4 : 4;} sW; psInst->bAddressOffset = 1; sU.i4 = DecodeImmediateAddressOffset( - IMMEDIATE_ADDRESS_OFFSET_U, ui32ExtOpcodeToken); - sV.i4 = DecodeImmediateAddressOffset( - IMMEDIATE_ADDRESS_OFFSET_V, ui32ExtOpcodeToken); - sW.i4 = DecodeImmediateAddressOffset( - IMMEDIATE_ADDRESS_OFFSET_W, ui32ExtOpcodeToken); - - psInst->iUAddrOffset = sU.i4; - psInst->iVAddrOffset = sV.i4; - psInst->iWAddrOffset = sW.i4; + IMMEDIATE_ADDRESS_OFFSET_U, ui32ExtOpcodeToken); + sV.i4 = DecodeImmediateAddressOffset( + IMMEDIATE_ADDRESS_OFFSET_V, ui32ExtOpcodeToken); + sW.i4 = DecodeImmediateAddressOffset( + IMMEDIATE_ADDRESS_OFFSET_W, ui32ExtOpcodeToken); + + psInst->iUAddrOffset = sU.i4; + psInst->iVAddrOffset = sV.i4; + psInst->iWAddrOffset = sW.i4; + } + else if (eExtType == EXTENDED_OPCODE_RESOURCE_RETURN_TYPE) + { + psInst->xType = DecodeExtendedResourceReturnType(0, ui32ExtOpcodeToken); + psInst->yType = DecodeExtendedResourceReturnType(1, ui32ExtOpcodeToken); + psInst->zType = DecodeExtendedResourceReturnType(2, ui32ExtOpcodeToken); + psInst->wType = DecodeExtendedResourceReturnType(3, ui32ExtOpcodeToken); + } + else if (eExtType == EXTENDED_OPCODE_RESOURCE_DIM) + { + psInst->eResDim = DecodeExtendedResourceDimension(ui32ExtOpcodeToken); } - else if(eExtType == EXTENDED_OPCODE_RESOURCE_RETURN_TYPE) - { - psInst->xType = DecodeExtendedResourceReturnType(0, ui32ExtOpcodeToken); - psInst->yType = DecodeExtendedResourceReturnType(1, ui32ExtOpcodeToken); - psInst->zType = DecodeExtendedResourceReturnType(2, ui32ExtOpcodeToken); - psInst->wType = DecodeExtendedResourceReturnType(3, ui32ExtOpcodeToken); - } - else if(eExtType == EXTENDED_OPCODE_RESOURCE_DIM) - { - psInst->eResDim = DecodeExtendedResourceDimension(ui32ExtOpcodeToken); - } - - ui32OperandOffset++; - } - while(DecodeIsOpcodeExtended(pui32Token[ui32OperandOffset-1])); + + ui32OperandOffset++; + } + while (DecodeIsOpcodeExtended(pui32Token[ui32OperandOffset - 1])); } - if(eOpcode < NUM_OPCODES && eOpcode >= 0) + if (eOpcode < NUM_OPCODES && eOpcode >= 0) { psShader->aiOpcodeUsed[eOpcode] = 1; } @@ -927,25 +912,25 @@ const uint32_t* DecodeInstruction(const uint32_t* pui32Token, Instruction* psIns case OPCODE_DEFAULT: case OPCODE_ENDSWITCH: case OPCODE_NOP: - case OPCODE_HS_CONTROL_POINT_PHASE: - case OPCODE_HS_FORK_PHASE: - case OPCODE_HS_JOIN_PHASE: + case OPCODE_HS_CONTROL_POINT_PHASE: + case OPCODE_HS_FORK_PHASE: + case OPCODE_HS_JOIN_PHASE: { psInst->ui32NumOperands = 0; - psInst->ui32FirstSrc = 0; + psInst->ui32FirstSrc = 0; break; } - case OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT: - case OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT: - { + case OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT: + case OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT: + { psInst->ui32NumOperands = 0; - psInst->ui32FirstSrc = 0; - break; - } + psInst->ui32FirstSrc = 0; + break; + } case OPCODE_SYNC: { psInst->ui32NumOperands = 0; - psInst->ui32FirstSrc = 0; + psInst->ui32FirstSrc = 0; psInst->ui32SyncFlags = DecodeSyncFlags(*pui32Token); break; } @@ -959,45 +944,45 @@ const uint32_t* DecodeInstruction(const uint32_t* pui32Token, Instruction* psIns case OPCODE_LABEL: { psInst->ui32NumOperands = 1; - psInst->ui32FirstSrc = 0; - ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[0]); + psInst->ui32FirstSrc = 0; + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[0]); break; } case OPCODE_INTERFACE_CALL: { psInst->ui32NumOperands = 1; - psInst->ui32FirstSrc = 0; + psInst->ui32FirstSrc = 0; psInst->ui32FuncIndexWithinInterface = pui32Token[ui32OperandOffset]; ui32OperandOffset++; - ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[0]); - + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[0]); + break; } - /* Floating point instruction decodes */ + /* Floating point instruction decodes */ //Instructions with two operands go here case OPCODE_MOV: { psInst->ui32NumOperands = 2; - ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[0]); - ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[1]); + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[0]); + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[1]); break; } - case OPCODE_LOG: - case OPCODE_RSQ: - case OPCODE_EXP: - case OPCODE_SQRT: + case OPCODE_LOG: + case OPCODE_RSQ: + case OPCODE_EXP: + case OPCODE_SQRT: case OPCODE_ROUND_PI: - case OPCODE_ROUND_NI: - case OPCODE_ROUND_Z: - case OPCODE_ROUND_NE: - case OPCODE_FRC: - case OPCODE_FTOU: - case OPCODE_FTOI: + case OPCODE_ROUND_NI: + case OPCODE_ROUND_Z: + case OPCODE_ROUND_NE: + case OPCODE_FRC: + case OPCODE_FTOU: + case OPCODE_FTOI: case OPCODE_UTOF: - case OPCODE_ITOF: + case OPCODE_ITOF: case OPCODE_INEG: case OPCODE_IMM_ATOMIC_ALLOC: case OPCODE_IMM_ATOMIC_CONSUME: @@ -1013,39 +998,39 @@ const uint32_t* DecodeInstruction(const uint32_t* pui32Token, Instruction* psIns case OPCODE_F32TOF16: case OPCODE_F16TOF32: case OPCODE_RCP: - case OPCODE_DERIV_RTX: - case OPCODE_DERIV_RTY: - case OPCODE_DERIV_RTX_COARSE: - case OPCODE_DERIV_RTX_FINE: - case OPCODE_DERIV_RTY_COARSE: - case OPCODE_DERIV_RTY_FINE: + case OPCODE_DERIV_RTX: + case OPCODE_DERIV_RTY: + case OPCODE_DERIV_RTX_COARSE: + case OPCODE_DERIV_RTX_FINE: + case OPCODE_DERIV_RTY_COARSE: + case OPCODE_DERIV_RTY_FINE: case OPCODE_NOT: - case OPCODE_BUFINFO: + case OPCODE_BUFINFO: { psInst->ui32NumOperands = 2; - ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[0]); - ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[1]); + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[0]); + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[1]); break; } //Instructions with three operands go here case OPCODE_SINCOS: - { - psInst->ui32FirstSrc = 2; - //Intentional fall-through - } + { + psInst->ui32FirstSrc = 2; + //Intentional fall-through + } case OPCODE_IMIN: - case OPCODE_UMIN: - case OPCODE_UMAX: - case OPCODE_MIN: - case OPCODE_IMAX: - case OPCODE_MAX: - case OPCODE_MUL: - case OPCODE_DIV: - case OPCODE_ADD: - case OPCODE_DP2: - case OPCODE_DP3: - case OPCODE_DP4: + case OPCODE_UMIN: + case OPCODE_UMAX: + case OPCODE_MIN: + case OPCODE_IMAX: + case OPCODE_MAX: + case OPCODE_MUL: + case OPCODE_DIV: + case OPCODE_ADD: + case OPCODE_DP2: + case OPCODE_DP3: + case OPCODE_DP4: case OPCODE_NE: case OPCODE_OR: case OPCODE_XOR: @@ -1055,12 +1040,12 @@ const uint32_t* DecodeInstruction(const uint32_t* pui32Token, Instruction* psIns case OPCODE_AND: case OPCODE_GE: case OPCODE_IGE: - case OPCODE_EQ: - case OPCODE_USHR: - case OPCODE_ISHL: - case OPCODE_ISHR: - case OPCODE_LD: - case OPCODE_ILT: + case OPCODE_EQ: + case OPCODE_USHR: + case OPCODE_ISHL: + case OPCODE_ISHR: + case OPCODE_LD: + case OPCODE_ILT: case OPCODE_INE: case OPCODE_UGE: case OPCODE_ULT: @@ -1083,16 +1068,16 @@ const uint32_t* DecodeInstruction(const uint32_t* pui32Token, Instruction* psIns case OPCODE_DDIV: { psInst->ui32NumOperands = 3; - ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[0]); - ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[1]); - ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[2]); + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[0]); + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[1]); + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[2]); break; } //Instructions with four operands go here - case OPCODE_MAD: + case OPCODE_MAD: case OPCODE_MOVC: - case OPCODE_IMAD: - case OPCODE_UDIV: + case OPCODE_IMAD: + case OPCODE_UDIV: case OPCODE_LOD: case OPCODE_SAMPLE: case OPCODE_GATHER4: @@ -1111,21 +1096,21 @@ const uint32_t* DecodeInstruction(const uint32_t* pui32Token, Instruction* psIns case OPCODE_IMM_ATOMIC_UMIN: case OPCODE_DMOVC: case OPCODE_DFMA: - case OPCODE_IMUL: - { + case OPCODE_IMUL: + { psInst->ui32NumOperands = 4; - if(eOpcode == OPCODE_IMUL || eOpcode == OPCODE_UDIV) - { - psInst->ui32FirstSrc = 2; - } + if (eOpcode == OPCODE_IMUL || eOpcode == OPCODE_UDIV) + { + psInst->ui32FirstSrc = 2; + } - ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[0]); - ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[1]); - ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[2]); - ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[3]); + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[0]); + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[1]); + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[2]); + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[3]); break; - } + } case OPCODE_GATHER4_PO: case OPCODE_SAMPLE_L: case OPCODE_BFI: @@ -1133,51 +1118,51 @@ const uint32_t* DecodeInstruction(const uint32_t* pui32Token, Instruction* psIns case OPCODE_IMM_ATOMIC_CMP_EXCH: { psInst->ui32NumOperands = 5; - ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[0]); - ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[1]); - ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[2]); - ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[3]); - ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[4]); + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[0]); + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[1]); + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[2]); + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[3]); + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[4]); break; } case OPCODE_GATHER4_C: - case OPCODE_SAMPLE_C: - case OPCODE_SAMPLE_C_LZ: + case OPCODE_SAMPLE_C: + case OPCODE_SAMPLE_C_LZ: case OPCODE_SAMPLE_B: - { + { psInst->ui32NumOperands = 5; - ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[0]); - ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[1]); - ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[2]); - ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[3]); - ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[4]); - - /* sample_b is not a shadow sampler, others need flagging */ - if (eOpcode != OPCODE_SAMPLE_B) - { - MarkTextureAsShadow(&psShader->sInfo, psPhase->psDecl, &psInst->asOperands[2]); - } + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[0]); + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[1]); + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[2]); + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[3]); + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[4]); + + /* sample_b is not a shadow sampler, others need flagging */ + if (eOpcode != OPCODE_SAMPLE_B) + { + MarkTextureAsShadow(&psShader->sInfo, psPhase->psDecl, &psInst->asOperands[2]); + } break; - } + } case OPCODE_GATHER4_PO_C: case OPCODE_SAMPLE_D: { psInst->ui32NumOperands = 6; - ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[0]); - ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[1]); - ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[2]); - ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[3]); - ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[4]); - ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[5]); - - /* sample_d is not a shadow sampler, others need flagging */ - if (eOpcode != OPCODE_SAMPLE_D) - { - MarkTextureAsShadow(&psShader->sInfo, - psPhase->psDecl, - &psInst->asOperands[2]); - } + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[0]); + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[1]); + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[2]); + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[3]); + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[4]); + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[5]); + + /* sample_d is not a shadow sampler, others need flagging */ + if (eOpcode != OPCODE_SAMPLE_D) + { + MarkTextureAsShadow(&psShader->sInfo, + psPhase->psDecl, + &psInst->asOperands[2]); + } break; } case OPCODE_IF: @@ -1187,30 +1172,30 @@ const uint32_t* DecodeInstruction(const uint32_t* pui32Token, Instruction* psIns case OPCODE_DISCARD: { psInst->eBooleanTestType = DecodeInstrTestBool(*pui32Token); - psInst->ui32NumOperands = 1; - psInst->ui32FirstSrc = 0; // no destination registers - ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[0]); + psInst->ui32NumOperands = 1; + psInst->ui32FirstSrc = 0; // no destination registers + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[0]); break; } - case OPCODE_CALLC: + case OPCODE_CALLC: { psInst->eBooleanTestType = DecodeInstrTestBool(*pui32Token); - psInst->ui32NumOperands = 2; - ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[0]); - ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[1]); + psInst->ui32NumOperands = 2; + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[0]); + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[1]); break; } - case OPCODE_CUSTOMDATA: - { + case OPCODE_CUSTOMDATA: + { psInst->ui32NumOperands = 0; - ui32TokenLength = pui32Token[1]; - break; - } + ui32TokenLength = pui32Token[1]; + break; + } case OPCODE_EVAL_CENTROID: { psInst->ui32NumOperands = 2; - ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[0]); - ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[1]); + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[0]); + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[1]); break; } case OPCODE_EVAL_SAMPLE_INDEX: @@ -1221,46 +1206,46 @@ const uint32_t* DecodeInstruction(const uint32_t* pui32Token, Instruction* psIns case OPCODE_STORE_RAW: { psInst->ui32NumOperands = 3; - ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[0]); - ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[1]); - ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[2]); + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[0]); + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[1]); + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[2]); break; } case OPCODE_STORE_STRUCTURED: case OPCODE_LD_STRUCTURED: { psInst->ui32NumOperands = 4; - ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[0]); - ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[1]); - ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[2]); - ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[3]); + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[0]); + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[1]); + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[2]); + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[3]); break; } - case OPCODE_RESINFO: + case OPCODE_RESINFO: { psInst->ui32NumOperands = 3; - psInst->eResInfoReturnType = DecodeResInfoReturnType(pui32Token[0]); + psInst->eResInfoReturnType = DecodeResInfoReturnType(pui32Token[0]); - ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[0]); - ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[1]); - ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[2]); + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[0]); + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[1]); + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[2]); break; } - case OPCODE_SAMPLE_INFO: - { - psInst->ui32NumOperands = 2; + case OPCODE_SAMPLE_INFO: + { + psInst->ui32NumOperands = 2; - psInst->eResInfoReturnType = DecodeResInfoReturnType(pui32Token[0]); + psInst->eResInfoReturnType = DecodeResInfoReturnType(pui32Token[0]); - ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[0]); - ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[1]); - break; - } + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[0]); + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[1]); + break; + } case OPCODE_MSAD: default: { - ASSERT(0); + ASSERT(0); break; } } @@ -1272,307 +1257,307 @@ const uint32_t* DecodeInstruction(const uint32_t* pui32Token, Instruction* psIns uint32_t bTextureSampleInstruction = 0; switch (eOpcode) { - case OPCODE_GATHER4: - // dest, coords, tex, sampler - ui32TextureRegisterNumber = 2; - ui32SamplerRegisterNumber = 3; - bTextureSampleInstruction = 1; - break; - case OPCODE_GATHER4_PO: - //dest, coords, offset, tex, sampler - ui32TextureRegisterNumber = 3; - ui32SamplerRegisterNumber = 4; - bTextureSampleInstruction = 1; - break; - case OPCODE_GATHER4_C: - //dest, coords, tex, sampler srcReferenceValue - ui32TextureRegisterNumber = 2; - ui32SamplerRegisterNumber = 3; - bTextureSampleInstruction = 1; - break; - case OPCODE_GATHER4_PO_C: - //dest, coords, offset, tex, sampler, srcReferenceValue - ui32TextureRegisterNumber = 3; - ui32SamplerRegisterNumber = 4; - bTextureSampleInstruction = 1; - break; - case OPCODE_SAMPLE: - case OPCODE_SAMPLE_L: - case OPCODE_SAMPLE_C: - case OPCODE_SAMPLE_C_LZ: - case OPCODE_SAMPLE_B: - case OPCODE_SAMPLE_D: - // dest, coords, tex, sampler [, reference] - ui32TextureRegisterNumber = 2; - ui32SamplerRegisterNumber = 3; - bTextureSampleInstruction = 1; - break; - default: - break; + case OPCODE_GATHER4: + // dest, coords, tex, sampler + ui32TextureRegisterNumber = 2; + ui32SamplerRegisterNumber = 3; + bTextureSampleInstruction = 1; + break; + case OPCODE_GATHER4_PO: + //dest, coords, offset, tex, sampler + ui32TextureRegisterNumber = 3; + ui32SamplerRegisterNumber = 4; + bTextureSampleInstruction = 1; + break; + case OPCODE_GATHER4_C: + //dest, coords, tex, sampler srcReferenceValue + ui32TextureRegisterNumber = 2; + ui32SamplerRegisterNumber = 3; + bTextureSampleInstruction = 1; + break; + case OPCODE_GATHER4_PO_C: + //dest, coords, offset, tex, sampler, srcReferenceValue + ui32TextureRegisterNumber = 3; + ui32SamplerRegisterNumber = 4; + bTextureSampleInstruction = 1; + break; + case OPCODE_SAMPLE: + case OPCODE_SAMPLE_L: + case OPCODE_SAMPLE_C: + case OPCODE_SAMPLE_C_LZ: + case OPCODE_SAMPLE_B: + case OPCODE_SAMPLE_D: + // dest, coords, tex, sampler [, reference] + ui32TextureRegisterNumber = 2; + ui32SamplerRegisterNumber = 3; + bTextureSampleInstruction = 1; + break; + default: + break; } - + if (bTextureSampleInstruction) { - MarkTextureSamplerPair(&psShader->sInfo, - psPhase->psDecl, - &psInst->asOperands[ui32TextureRegisterNumber], - &psInst->asOperands[ui32SamplerRegisterNumber], - psShader->textureSamplers); + MarkTextureSamplerPair(&psShader->sInfo, + psPhase->psDecl, + &psInst->asOperands[ui32TextureRegisterNumber], + &psInst->asOperands[ui32SamplerRegisterNumber], + psShader->textureSamplers); } } - + return pui32Token + ui32TokenLength; } const uint32_t* DecodeShaderPhase(const uint32_t* pui32Tokens, - Shader* psShader, - const SHADER_PHASE_TYPE ePhaseType, - ShaderPhase *psPhase) + Shader* psShader, + const SHADER_PHASE_TYPE ePhaseType, + ShaderPhase *psPhase) { - const uint32_t* pui32CurrentToken = pui32Tokens; - const uint32_t ui32ShaderLength = psShader->ui32ShaderLength; + const uint32_t* pui32CurrentToken = pui32Tokens; + const uint32_t ui32ShaderLength = psShader->ui32ShaderLength; - psPhase->ePhase = ePhaseType; - //Using ui32ShaderLength as the declaration and instruction count + psPhase->ePhase = ePhaseType; + //Using ui32ShaderLength as the declaration and instruction count //will allocate more than enough memory. Avoids having to //traverse the entire shader just to get the real counts. - psPhase->psDecl.clear(); - psPhase->psDecl.reserve(ui32ShaderLength); + psPhase->psDecl.clear(); + psPhase->psDecl.reserve(ui32ShaderLength); - while(1) //Keep going until we reach the first non-declaration token, or the end of the shader. + while (1) //Keep going until we reach the first non-declaration token, or the end of the shader. { - psPhase->psDecl.push_back(Declaration()); - const uint32_t* pui32Result = DecodeDeclaration(psShader, pui32CurrentToken, &psPhase->psDecl[psPhase->psDecl.size()-1], psPhase); + psPhase->psDecl.push_back(Declaration()); + const uint32_t* pui32Result = DecodeDeclaration(psShader, pui32CurrentToken, &psPhase->psDecl[psPhase->psDecl.size() - 1], psPhase); - if(pui32Result) + if (pui32Result) { pui32CurrentToken = pui32Result; - if(pui32CurrentToken >= (psShader->pui32FirstToken + ui32ShaderLength)) + if (pui32CurrentToken >= (psShader->pui32FirstToken + ui32ShaderLength)) { break; } } else { - psPhase->psDecl.pop_back(); // Remove the last one, it wasn't needed after all + psPhase->psDecl.pop_back(); // Remove the last one, it wasn't needed after all break; } } //Instructions - psPhase->psInst.clear(); - psPhase->psInst.reserve(ui32ShaderLength); - + psPhase->psInst.clear(); + psPhase->psInst.reserve(ui32ShaderLength); + while (pui32CurrentToken < (psShader->pui32FirstToken + ui32ShaderLength)) { - psPhase->psInst.push_back(Instruction()); - const uint32_t* nextInstr = DecodeInstruction(pui32CurrentToken, &psPhase->psInst[psPhase->psInst.size()-1], psShader, psPhase); + psPhase->psInst.push_back(Instruction()); + const uint32_t* nextInstr = DecodeInstruction(pui32CurrentToken, &psPhase->psInst[psPhase->psInst.size() - 1], psShader, psPhase); #ifdef _DEBUG - if(nextInstr == pui32CurrentToken) + if (nextInstr == pui32CurrentToken) { ASSERT(0); break; } #endif - if (psPhase->psInst[psPhase->psInst.size() - 1].eOpcode == OPCODE_HS_FORK_PHASE || psPhase->psInst[psPhase->psInst.size() - 1].eOpcode == OPCODE_HS_JOIN_PHASE) - { - psPhase->psInst.pop_back(); - return pui32CurrentToken; - } + if (psPhase->psInst[psPhase->psInst.size() - 1].eOpcode == OPCODE_HS_FORK_PHASE || psPhase->psInst[psPhase->psInst.size() - 1].eOpcode == OPCODE_HS_JOIN_PHASE) + { + psPhase->psInst.pop_back(); + return pui32CurrentToken; + } pui32CurrentToken = nextInstr; } - return pui32CurrentToken; + return pui32CurrentToken; } const void AllocateHullPhaseArrays(const uint32_t* pui32Tokens, - Shader* psShader) + Shader* psShader) { - const uint32_t* pui32CurrentToken = pui32Tokens; - const uint32_t ui32ShaderLength = psShader->ui32ShaderLength; - uint32_t ui32PhaseCount = 2; // Always the main phase and the HS global declarations - uint32_t i; + const uint32_t* pui32CurrentToken = pui32Tokens; + const uint32_t ui32ShaderLength = psShader->ui32ShaderLength; + uint32_t ui32PhaseCount = 2; // Always the main phase and the HS global declarations + uint32_t i; - while(1) //Keep going until we reach the first non-declaration token, or the end of the shader. + while (1) //Keep going until we reach the first non-declaration token, or the end of the shader. { - uint32_t ui32TokenLength = DecodeInstructionLength(*pui32CurrentToken); - /*const uint32_t bExtended =*/ DecodeIsOpcodeExtended(*pui32CurrentToken); - const OPCODE_TYPE eOpcode = DecodeOpcodeType(*pui32CurrentToken); + uint32_t ui32TokenLength = DecodeInstructionLength(*pui32CurrentToken); + /*const uint32_t bExtended =*/ DecodeIsOpcodeExtended(*pui32CurrentToken); + const OPCODE_TYPE eOpcode = DecodeOpcodeType(*pui32CurrentToken); - if(eOpcode == OPCODE_CUSTOMDATA) - { - ui32TokenLength = pui32CurrentToken[1]; - } + if (eOpcode == OPCODE_CUSTOMDATA) + { + ui32TokenLength = pui32CurrentToken[1]; + } pui32CurrentToken = pui32CurrentToken + ui32TokenLength; - switch (eOpcode) - { - case OPCODE_HS_CONTROL_POINT_PHASE: - case OPCODE_HS_JOIN_PHASE: - case OPCODE_HS_FORK_PHASE: - ui32PhaseCount++; - break; - default: - break; - } + switch (eOpcode) + { + case OPCODE_HS_CONTROL_POINT_PHASE: + case OPCODE_HS_JOIN_PHASE: + case OPCODE_HS_FORK_PHASE: + ui32PhaseCount++; + break; + default: + break; + } - if(pui32CurrentToken >= (psShader->pui32FirstToken + ui32ShaderLength)) + if (pui32CurrentToken >= (psShader->pui32FirstToken + ui32ShaderLength)) { break; } } - psShader->asPhases.clear(); - psShader->asPhases.resize(ui32PhaseCount); - for (i = 0; i < ui32PhaseCount; i++) - psShader->asPhases[i].ui32InstanceCount = 1; + psShader->asPhases.clear(); + psShader->asPhases.resize(ui32PhaseCount); + for (i = 0; i < ui32PhaseCount; i++) + psShader->asPhases[i].ui32InstanceCount = 1; } const uint32_t* DecodeHullShader(const uint32_t* pui32Tokens, Shader* psShader) { - const uint32_t* pui32CurrentToken = pui32Tokens; - const uint32_t ui32ShaderLength = psShader->ui32ShaderLength; - ShaderPhase *psPhase; + const uint32_t* pui32CurrentToken = pui32Tokens; + const uint32_t ui32ShaderLength = psShader->ui32ShaderLength; + ShaderPhase *psPhase; - AllocateHullPhaseArrays(pui32Tokens, psShader); + AllocateHullPhaseArrays(pui32Tokens, psShader); - // Index 1 is HS_GLOBAL_DECL - psShader->asPhases[1].psInst.clear(); - psShader->asPhases[1].psDecl.clear(); - psShader->asPhases[1].ePhase = HS_GLOBAL_DECL_PHASE; - psShader->asPhases[1].ui32InstanceCount = 1; + // Index 1 is HS_GLOBAL_DECL + psShader->asPhases[1].psInst.clear(); + psShader->asPhases[1].psDecl.clear(); + psShader->asPhases[1].ePhase = HS_GLOBAL_DECL_PHASE; + psShader->asPhases[1].ui32InstanceCount = 1; - // The next phase to parse in. - psPhase = &psShader->asPhases[2]; + // The next phase to parse in. + psPhase = &psShader->asPhases[2]; - //Keep going until we have done all phases or the end of the shader. - while(1) + //Keep going until we have done all phases or the end of the shader. + while (1) { - Declaration newDecl; + Declaration newDecl; const uint32_t* pui32Result = DecodeDeclaration(psShader, pui32CurrentToken, &newDecl, psPhase); - if(pui32Result) + if (pui32Result) { pui32CurrentToken = pui32Result; - if(newDecl.eOpcode == OPCODE_HS_CONTROL_POINT_PHASE) - { - pui32CurrentToken = DecodeShaderPhase(pui32CurrentToken, psShader, HS_CTRL_POINT_PHASE, psPhase); - psPhase++; - } - else if(newDecl.eOpcode == OPCODE_HS_FORK_PHASE) - { - pui32CurrentToken = DecodeShaderPhase(pui32CurrentToken, psShader, HS_FORK_PHASE, psPhase++); - } - else if(newDecl.eOpcode == OPCODE_HS_JOIN_PHASE) - { - pui32CurrentToken = DecodeShaderPhase(pui32CurrentToken, psShader, HS_JOIN_PHASE, psPhase++); - } - else - { - psShader->asPhases[1].psDecl.push_back(newDecl); - } - - if(pui32CurrentToken >= (psShader->pui32FirstToken + ui32ShaderLength)) - { - break; - } - } + if (newDecl.eOpcode == OPCODE_HS_CONTROL_POINT_PHASE) + { + pui32CurrentToken = DecodeShaderPhase(pui32CurrentToken, psShader, HS_CTRL_POINT_PHASE, psPhase); + psPhase++; + } + else if (newDecl.eOpcode == OPCODE_HS_FORK_PHASE) + { + pui32CurrentToken = DecodeShaderPhase(pui32CurrentToken, psShader, HS_FORK_PHASE, psPhase++); + } + else if (newDecl.eOpcode == OPCODE_HS_JOIN_PHASE) + { + pui32CurrentToken = DecodeShaderPhase(pui32CurrentToken, psShader, HS_JOIN_PHASE, psPhase++); + } + else + { + psShader->asPhases[1].psDecl.push_back(newDecl); + } + + if (pui32CurrentToken >= (psShader->pui32FirstToken + ui32ShaderLength)) + { + break; + } + } else { break; } } - return pui32CurrentToken; + return pui32CurrentToken; } void Decode(const uint32_t* pui32Tokens, Shader* psShader) { - const uint32_t* pui32CurrentToken = pui32Tokens; + const uint32_t* pui32CurrentToken = pui32Tokens; const uint32_t ui32ShaderLength = pui32Tokens[1]; - psShader->ui32MajorVersion = DecodeProgramMajorVersion(*pui32CurrentToken); - psShader->ui32MinorVersion = DecodeProgramMinorVersion(*pui32CurrentToken); - psShader->eShaderType = DecodeShaderType(*pui32CurrentToken); + psShader->ui32MajorVersion = DecodeProgramMajorVersion(*pui32CurrentToken); + psShader->ui32MinorVersion = DecodeProgramMinorVersion(*pui32CurrentToken); + psShader->eShaderType = DecodeShaderType(*pui32CurrentToken); - pui32CurrentToken++;//Move to shader length - psShader->ui32ShaderLength = ui32ShaderLength; + pui32CurrentToken++;//Move to shader length + psShader->ui32ShaderLength = ui32ShaderLength; pui32CurrentToken++;//Move to after shader length (usually a declaration) psShader->pui32FirstToken = pui32Tokens; - if(psShader->eShaderType == HULL_SHADER) - { - // DecodeHullShader will allocate psShader->asPhases array. - pui32CurrentToken = DecodeHullShader(pui32CurrentToken, psShader); - return; - } - else - { - psShader->asPhases.clear(); - psShader->asPhases.resize(1); - } - - // Phase 0 is always the main phase - psShader->asPhases[0].ui32InstanceCount = 1; - - DecodeShaderPhase(pui32CurrentToken, psShader, MAIN_PHASE, &psShader->asPhases[0]); + if (psShader->eShaderType == HULL_SHADER) + { + // DecodeHullShader will allocate psShader->asPhases array. + pui32CurrentToken = DecodeHullShader(pui32CurrentToken, psShader); + return; + } + else + { + psShader->asPhases.clear(); + psShader->asPhases.resize(1); + } + + // Phase 0 is always the main phase + psShader->asPhases[0].ui32InstanceCount = 1; + + DecodeShaderPhase(pui32CurrentToken, psShader, MAIN_PHASE, &psShader->asPhases[0]); } Shader* DecodeDXBC(uint32_t* data, uint32_t decodeFlags) { Shader* psShader; - DXBCContainerHeader* header = (DXBCContainerHeader*)data; - uint32_t i; - uint32_t chunkCount; - uint32_t* chunkOffsets; + DXBCContainerHeader* header = (DXBCContainerHeader*)data; + uint32_t i; + uint32_t chunkCount; + uint32_t* chunkOffsets; ReflectionChunks refChunks; uint32_t* shaderChunk = 0; - if(header->fourcc != FOURCC_DXBC) - { - ASSERT(0 && "Invalid shader type (DX9 shaders no longer supported)!"); - } + if (header->fourcc != FOURCC_DXBC) + { + ASSERT(0 && "Invalid shader type (DX9 shaders no longer supported)!"); + } refChunks.pui32Inputs = NULL; refChunks.pui32Interfaces = NULL; refChunks.pui32Outputs = NULL; refChunks.pui32Resources = NULL; - refChunks.pui32Inputs11 = NULL; - refChunks.pui32Outputs11 = NULL; - refChunks.pui32OutputsWithStreams = NULL; - refChunks.pui32PatchConstants = NULL; - refChunks.pui32PatchConstants11 = NULL; + refChunks.pui32Inputs11 = NULL; + refChunks.pui32Outputs11 = NULL; + refChunks.pui32OutputsWithStreams = NULL; + refChunks.pui32PatchConstants = NULL; + refChunks.pui32PatchConstants11 = NULL; - chunkOffsets = (uint32_t*)(header + 1); + chunkOffsets = (uint32_t*)(header + 1); - chunkCount = header->chunkCount; + chunkCount = header->chunkCount; - for(i = 0; i < chunkCount; ++i) - { - uint32_t offset = chunkOffsets[i]; + for (i = 0; i < chunkCount; ++i) + { + uint32_t offset = chunkOffsets[i]; - DXBCChunkHeader* chunk = (DXBCChunkHeader*)((char*)data + offset); + DXBCChunkHeader* chunk = (DXBCChunkHeader*)((char*)data + offset); - switch(chunk->fourcc) + switch (chunk->fourcc) { case FOURCC_ISGN: { refChunks.pui32Inputs = (uint32_t*)(chunk + 1); break; } - case FOURCC_ISG1: - { + case FOURCC_ISG1: + { refChunks.pui32Inputs11 = (uint32_t*)(chunk + 1); break; - } + } case FOURCC_RDEF: { refChunks.pui32Resources = (uint32_t*)(chunk + 1); @@ -1588,51 +1573,51 @@ Shader* DecodeDXBC(uint32_t* data, uint32_t decodeFlags) refChunks.pui32Outputs = (uint32_t*)(chunk + 1); break; } - case FOURCC_OSG1: + case FOURCC_OSG1: { refChunks.pui32Outputs11 = (uint32_t*)(chunk + 1); break; } - case FOURCC_OSG5: - { + case FOURCC_OSG5: + { refChunks.pui32OutputsWithStreams = (uint32_t*)(chunk + 1); - break; - } + break; + } case FOURCC_SHDR: case FOURCC_SHEX: { shaderChunk = (uint32_t*)(chunk + 1); break; } - case FOURCC_PSGN: - { - refChunks.pui32PatchConstants = (uint32_t*)(chunk + 1); - break; - } - case FOURCC_PSG1: - { - refChunks.pui32PatchConstants11 = (uint32_t*)(chunk + 1); - break; - } - case FOURCC_STAT: - case FOURCC_SFI0: - { - break; // Ignored - } + case FOURCC_PSGN: + { + refChunks.pui32PatchConstants = (uint32_t*)(chunk + 1); + break; + } + case FOURCC_PSG1: + { + refChunks.pui32PatchConstants11 = (uint32_t*)(chunk + 1); + break; + } + case FOURCC_STAT: + case FOURCC_SFI0: + { + break; // Ignored + } default: { -// ASSERT(0); // Uncomment this to hunt for unknown chunks later on. +// ASSERT(0); // Uncomment this to hunt for unknown chunks later on. break; } } - } + } - if(shaderChunk) + if (shaderChunk) { uint32_t ui32MajorVersion; uint32_t ui32MinorVersion; - psShader = new Shader(); + psShader = new Shader(); ui32MajorVersion = DecodeProgramMajorVersion(*shaderChunk); ui32MinorVersion = DecodeProgramMinorVersion(*shaderChunk); @@ -1649,4 +1634,3 @@ Shader* DecodeDXBC(uint32_t* data, uint32_t decodeFlags) return 0; } - diff --git a/src/internal_includes/ControlFlowGraph.h b/src/internal_includes/ControlFlowGraph.h index e21c4ca..58a75f7 100644 --- a/src/internal_includes/ControlFlowGraph.h +++ b/src/internal_includes/ControlFlowGraph.h @@ -14,141 +14,135 @@ class Operand; namespace HLSLcc { using namespace std; - - namespace ControlFlow - { - class BasicBlock; - - class ControlFlowGraph - { - friend class BasicBlock; - public: - ControlFlowGraph() - : m_BlockMap() - , m_BlockStorage() - {} - typedef std::vector > BasicBlockStorage; - - const BasicBlock &Build(const Instruction *firstInstruction); - - // Only works for instructions that start the basic block - const BasicBlock *GetBasicBlockForInstruction(const Instruction *instruction) const; - - // non-const version for BasicBlock - BasicBlock *GetBasicBlockForInstruction(const Instruction *instruction); - - const BasicBlockStorage &AllBlocks() const { return m_BlockStorage; } - private: +namespace ControlFlow +{ + class BasicBlock; - // Map for storing the created basic blocks. Map key is the pointer to the first instruction in the block - typedef std::map BasicBlockMap; + class ControlFlowGraph + { + friend class BasicBlock; + public: + ControlFlowGraph() + : m_BlockMap() + , m_BlockStorage() + {} - BasicBlockMap m_BlockMap; + typedef std::vector > BasicBlockStorage; - // auto_ptr -type storage for multiple BasicBlocks. BlockMap above only has pointers into these - BasicBlockStorage m_BlockStorage; - }; + const BasicBlock &Build(const Instruction *firstInstruction); + // Only works for instructions that start the basic block + const BasicBlock *GetBasicBlockForInstruction(const Instruction *instruction) const; - class BasicBlock - { - friend class ControlFlowGraph; - public: - // A set of register indices, one per each vec4 component per register - typedef std::set RegisterSet; - // The connections (either incoming or outgoing) from this block. The instruction is the same one as the key in ControlFlowGraph to that basic block - typedef std::set ConnectionSet; + // non-const version for BasicBlock + BasicBlock *GetBasicBlockForInstruction(const Instruction *instruction); - struct Definition - { - Definition(const Instruction *i = NULL, const Operand *o = NULL) - : m_Instruction(i) - , m_Operand(o) - {} + const BasicBlockStorage &AllBlocks() const { return m_BlockStorage; } + private: - Definition(const Definition &a) - : m_Instruction(a.m_Instruction) - , m_Operand(a.m_Operand) - {} + // Map for storing the created basic blocks. Map key is the pointer to the first instruction in the block + typedef std::map BasicBlockMap; - bool operator==(const Definition &a) const - { - if (a.m_Instruction != m_Instruction) - return false; - return a.m_Operand == m_Operand; - } + BasicBlockMap m_BlockMap; - bool operator!=(const Definition &a) const - { - if (a.m_Instruction == m_Instruction) - return false; - return a.m_Operand != m_Operand; - } + // auto_ptr -type storage for multiple BasicBlocks. BlockMap above only has pointers into these + BasicBlockStorage m_BlockStorage; + }; - bool operator<(const Definition &a) const - { - if (m_Instruction != a.m_Instruction) - return m_Instruction < a.m_Instruction; - return m_Operand < a.m_Operand; - } - const Instruction *m_Instruction; - const Operand *m_Operand; - }; + class BasicBlock + { + friend class ControlFlowGraph; + public: + // A set of register indices, one per each vec4 component per register + typedef std::set RegisterSet; + // The connections (either incoming or outgoing) from this block. The instruction is the same one as the key in ControlFlowGraph to that basic block + typedef std::set ConnectionSet; - typedef std::set ReachableDefinitionsPerVariable; // A set of possibly visible definitions for one component of one vec4 variable - typedef std::map ReachableVariables; // A VisibleDefinitionSet for each variable*component. + struct Definition + { + Definition(const Instruction *i = NULL, const Operand *o = NULL) + : m_Instruction(i) + , m_Operand(o) + {} - const Instruction *First() const { return m_First; } - const Instruction *Last() const { return m_Last; } + Definition(const Definition &a) + : m_Instruction(a.m_Instruction) + , m_Operand(a.m_Operand) + {} - const RegisterSet &UEVar() const { return m_UEVar; } - const RegisterSet &VarKill() const { return m_VarKill; } + bool operator==(const Definition &a) const + { + if (a.m_Instruction != m_Instruction) + return false; + return a.m_Operand == m_Operand; + } - const ConnectionSet &Preceding() const { return m_Preceding; } - const ConnectionSet &Succeeding() const { return m_Succeeding; } + bool operator!=(const Definition &a) const + { + if (a.m_Instruction == m_Instruction) + return false; + return a.m_Operand != m_Operand; + } - const ReachableVariables &DEDef() const { return m_DEDef; } - const ReachableVariables &Reachable() const { return m_Reachable; } + bool operator<(const Definition &a) const + { + if (m_Instruction != a.m_Instruction) + return m_Instruction < a.m_Instruction; + return m_Operand < a.m_Operand; + } - // Helper function: Do union of 2 ReachableVariables, store result in a. - static void RVarUnion(ReachableVariables &a, const ReachableVariables &b); + const Instruction *m_Instruction; + const Operand *m_Operand; + }; - private: + typedef std::set ReachableDefinitionsPerVariable; // A set of possibly visible definitions for one component of one vec4 variable + typedef std::map ReachableVariables; // A VisibleDefinitionSet for each variable*component. - // Generate a basic block. Private constructor, can only be constructed from ControlFlowGraph::Build() - BasicBlock(const Instruction *psFirst, ControlFlowGraph &graph, const Instruction *psPrecedingBlockHead); + const Instruction *First() const { return m_First; } + const Instruction *Last() const { return m_Last; } - // Walk through the instructions and build UEVar and VarKill sets, create succeeding nodes if they don't exist already. - void Build(); + const RegisterSet &UEVar() const { return m_UEVar; } + const RegisterSet &VarKill() const { return m_VarKill; } - bool RebuildReachable(); // Rebuild m_Reachable from preceding blocks and this one. Returns true if current value changed. + const ConnectionSet &Preceding() const { return m_Preceding; } + const ConnectionSet &Succeeding() const { return m_Succeeding; } + const ReachableVariables &DEDef() const { return m_DEDef; } + const ReachableVariables &Reachable() const { return m_Reachable; } - BasicBlock * AddChildBasicBlock(const Instruction *psFirst); + // Helper function: Do union of 2 ReachableVariables, store result in a. + static void RVarUnion(ReachableVariables &a, const ReachableVariables &b); - private: - ControlFlowGraph &m_Graph; // The graph object containing this block + private: - const Instruction *m_First; // The first instruction in the basic block - const Instruction *m_Last; // The last instruction in the basic block. Either OPCODE_RET or a branch/jump/loop instruction + // Generate a basic block. Private constructor, can only be constructed from ControlFlowGraph::Build() + BasicBlock(const Instruction *psFirst, ControlFlowGraph &graph, const Instruction *psPrecedingBlockHead); - RegisterSet m_UEVar; // Upwards-exposed variables (temps that need definition from upstream and are used in this basic block) - RegisterSet m_VarKill; // Set of variables that are defined in this block. + // Walk through the instructions and build UEVar and VarKill sets, create succeeding nodes if they don't exist already. + void Build(); - ConnectionSet m_Preceding; // Set of blocks that immediately precede this block in the CFG - ConnectionSet m_Succeeding; // Set of blocks that follow this block in the CFG + bool RebuildReachable(); // Rebuild m_Reachable from preceding blocks and this one. Returns true if current value changed. - ReachableVariables m_DEDef; // Downward-exposed definitions from this basic block. Always only one item per set. - ReachableVariables m_Reachable; // The set of variable definitions that are visible at the end of this block. + BasicBlock * AddChildBasicBlock(const Instruction *psFirst); - }; + private: + ControlFlowGraph &m_Graph; // The graph object containing this block + const Instruction *m_First; // The first instruction in the basic block + const Instruction *m_Last; // The last instruction in the basic block. Either OPCODE_RET or a branch/jump/loop instruction + RegisterSet m_UEVar; // Upwards-exposed variables (temps that need definition from upstream and are used in this basic block) + RegisterSet m_VarKill; // Set of variables that are defined in this block. - }; -}; + ConnectionSet m_Preceding; // Set of blocks that immediately precede this block in the CFG + ConnectionSet m_Succeeding; // Set of blocks that follow this block in the CFG + ReachableVariables m_DEDef; // Downward-exposed definitions from this basic block. Always only one item per set. + ReachableVariables m_Reachable; // The set of variable definitions that are visible at the end of this block. + }; +} +} diff --git a/src/internal_includes/ControlFlowGraphUtils.h b/src/internal_includes/ControlFlowGraphUtils.h index 0a799b2..69ad807 100644 --- a/src/internal_includes/ControlFlowGraphUtils.h +++ b/src/internal_includes/ControlFlowGraphUtils.h @@ -4,28 +4,27 @@ struct Instruction; namespace HLSLcc { - namespace ControlFlow - { - class Utils - { - public: - // For a given flow-control instruction, find the corresponding jump location: - // If the input is OPCODE_IF, then find the next same-level ELSE or ENDIF +1 - // For ELSE, find same level ENDIF + 1 - // For BREAK/BREAKC, find next ENDLOOP or ENDSWITCH + 1 - // For SWITCH, find next same-level CASE/DEFAULT (skip multiple consecutive case/default labels) or ENDSWITCH + 1 - // For ENDLOOP, find previous same-level LOOP + 1 - // For CASE/DEFAULT, find next same-level CASE/DEFAULT or ENDSWITCH + 1, skip multiple consecutive case/default labels - // For CONTINUE/C the previous LOOP + 1 - // Note that LOOP/ENDSWITCH itself is nothing but a label but it still starts a new basic block. - // Note that CASE labels fall through. - // Always returns the beginning of the next block, so skip multiple CASE/DEFAULT labels etc. - // If sawEndSwitch != null, will bet set to true if the label skipping saw past ENDSWITCH - // If needConnectToParent != null, will be set to true if sawEndSwitch == true and there are one or more case labels directly before it. - static const Instruction * GetJumpPoint(const Instruction *psStart, bool *sawEndSwitch = 0, bool *needConnectToParent = 0); - - static const Instruction *GetNextNonLabelInstruction(const Instruction *psStart, bool *sawEndSwitch = 0); +namespace ControlFlow +{ + class Utils + { + public: + // For a given flow-control instruction, find the corresponding jump location: + // If the input is OPCODE_IF, then find the next same-level ELSE or ENDIF +1 + // For ELSE, find same level ENDIF + 1 + // For BREAK/BREAKC, find next ENDLOOP or ENDSWITCH + 1 + // For SWITCH, find next same-level CASE/DEFAULT (skip multiple consecutive case/default labels) or ENDSWITCH + 1 + // For ENDLOOP, find previous same-level LOOP + 1 + // For CASE/DEFAULT, find next same-level CASE/DEFAULT or ENDSWITCH + 1, skip multiple consecutive case/default labels + // For CONTINUE/C the previous LOOP + 1 + // Note that LOOP/ENDSWITCH itself is nothing but a label but it still starts a new basic block. + // Note that CASE labels fall through. + // Always returns the beginning of the next block, so skip multiple CASE/DEFAULT labels etc. + // If sawEndSwitch != null, will bet set to true if the label skipping saw past ENDSWITCH + // If needConnectToParent != null, will be set to true if sawEndSwitch == true and there are one or more case labels directly before it. + static const Instruction * GetJumpPoint(const Instruction *psStart, bool *sawEndSwitch = 0, bool *needConnectToParent = 0); - }; - } -} \ No newline at end of file + static const Instruction *GetNextNonLabelInstruction(const Instruction *psStart, bool *sawEndSwitch = 0); + }; +} +} diff --git a/src/internal_includes/DataTypeAnalysis.h b/src/internal_includes/DataTypeAnalysis.h index 8c0207b..e01eb18 100644 --- a/src/internal_includes/DataTypeAnalysis.h +++ b/src/internal_includes/DataTypeAnalysis.h @@ -8,8 +8,8 @@ struct Instruction; namespace HLSLcc { - namespace DataTypeAnalysis - { - void SetDataTypes(HLSLCrossCompilerContext* psContext, std::vector &instructions, uint32_t ui32TempCount, std::vector &results); - }; -}; +namespace DataTypeAnalysis +{ + void SetDataTypes(HLSLCrossCompilerContext* psContext, std::vector &instructions, uint32_t ui32TempCount, std::vector &results); +} +} diff --git a/src/internal_includes/Declaration.h b/src/internal_includes/Declaration.h index a9123c1..cb3e446 100644 --- a/src/internal_includes/Declaration.h +++ b/src/internal_includes/Declaration.h @@ -1,4 +1,3 @@ - #pragma once #include @@ -6,11 +5,12 @@ #include "internal_includes/tokens.h" #include "internal_includes/Operand.h" -typedef struct ICBVec4_TAG { - uint32_t a; - uint32_t b; - uint32_t c; - uint32_t d; +typedef struct ICBVec4_TAG +{ + uint32_t a; + uint32_t b; + uint32_t c; + uint32_t d; } ICBVec4; #define ACCESS_FLAG_READ 0x1 @@ -18,85 +18,86 @@ typedef struct ICBVec4_TAG { struct Declaration { - Declaration() - : - eOpcode(OPCODE_INVALID), - ui32NumOperands(0), - ui32BufferStride(0) - {} - - OPCODE_TYPE eOpcode; - - uint32_t ui32NumOperands; - - Operand asOperands[2]; - - std::vector asImmediateConstBuffer; - //The declaration can set one of these - //values depending on the opcode. - union { - uint32_t ui32GlobalFlags; - uint32_t ui32NumTemps; - RESOURCE_DIMENSION eResourceDimension; - INTERPOLATION_MODE eInterpolation; - PRIMITIVE_TOPOLOGY eOutputPrimitiveTopology; - PRIMITIVE eInputPrimitive; - uint32_t ui32MaxOutputVertexCount; - TESSELLATOR_DOMAIN eTessDomain; - TESSELLATOR_PARTITIONING eTessPartitioning; - TESSELLATOR_OUTPUT_PRIMITIVE eTessOutPrim; - uint32_t aui32WorkGroupSize[3]; - uint32_t ui32HullPhaseInstanceCount; - float fMaxTessFactor; - uint32_t ui32IndexRange; - uint32_t ui32GSInstanceCount; - SB_SAMPLER_MODE eSamplerMode; // For sampler declarations, the sampler mode. - - struct Interface_TAG - { - uint32_t ui32InterfaceID; - uint32_t ui32NumFuncTables; - uint32_t ui32ArraySize; - } iface; - } value; - - uint32_t ui32BufferStride; - - struct UAV_TAG - { - UAV_TAG() : - ui32GloballyCoherentAccess(0), - bCounter(0), - Type(RETURN_TYPE_UNORM), - ui32NumComponents(0), - ui32AccessFlags(0) - { - } - uint32_t ui32GloballyCoherentAccess; - uint8_t bCounter; - RESOURCE_RETURN_TYPE Type; - uint32_t ui32NumComponents; - uint32_t ui32AccessFlags; - } sUAV; - - struct TGSM_TAG - { - uint32_t ui32Stride; - uint32_t ui32Count; - } sTGSM; - - struct IndexableTemp_TAG - { - uint32_t ui32RegIndex; - uint32_t ui32RegCount; - uint32_t ui32RegComponentSize; - } sIdxTemp; - - uint32_t ui32TableLength; - - uint32_t ui32IsShadowTex; - - // Set indexed by sampler register number. - std::set samplersUsed; + Declaration() + : + eOpcode(OPCODE_INVALID), + ui32NumOperands(0), + ui32BufferStride(0) + {} + + OPCODE_TYPE eOpcode; + + uint32_t ui32NumOperands; + + Operand asOperands[2]; + + std::vector asImmediateConstBuffer; + //The declaration can set one of these + //values depending on the opcode. + union + { + uint32_t ui32GlobalFlags; + uint32_t ui32NumTemps; + RESOURCE_DIMENSION eResourceDimension; + INTERPOLATION_MODE eInterpolation; + PRIMITIVE_TOPOLOGY eOutputPrimitiveTopology; + PRIMITIVE eInputPrimitive; + uint32_t ui32MaxOutputVertexCount; + TESSELLATOR_DOMAIN eTessDomain; + TESSELLATOR_PARTITIONING eTessPartitioning; + TESSELLATOR_OUTPUT_PRIMITIVE eTessOutPrim; + uint32_t aui32WorkGroupSize[3]; + uint32_t ui32HullPhaseInstanceCount; + float fMaxTessFactor; + uint32_t ui32IndexRange; + uint32_t ui32GSInstanceCount; + SB_SAMPLER_MODE eSamplerMode; // For sampler declarations, the sampler mode. + + struct Interface_TAG + { + uint32_t ui32InterfaceID; + uint32_t ui32NumFuncTables; + uint32_t ui32ArraySize; + } iface; + } value; + + uint32_t ui32BufferStride; + + struct UAV_TAG + { + UAV_TAG() : + ui32GloballyCoherentAccess(0), + bCounter(0), + Type(RETURN_TYPE_UNORM), + ui32NumComponents(0), + ui32AccessFlags(0) + { + } + + uint32_t ui32GloballyCoherentAccess; + uint8_t bCounter; + RESOURCE_RETURN_TYPE Type; + uint32_t ui32NumComponents; + uint32_t ui32AccessFlags; + } sUAV; + + struct TGSM_TAG + { + uint32_t ui32Stride; + uint32_t ui32Count; + } sTGSM; + + struct IndexableTemp_TAG + { + uint32_t ui32RegIndex; + uint32_t ui32RegCount; + uint32_t ui32RegComponentSize; + } sIdxTemp; + + uint32_t ui32TableLength; + + uint32_t ui32IsShadowTex; + + // Set indexed by sampler register number. + std::set samplersUsed; }; - diff --git a/src/internal_includes/HLSLCrossCompilerContext.h b/src/internal_includes/HLSLCrossCompilerContext.h index 50198d5..29be214 100644 --- a/src/internal_includes/HLSLCrossCompilerContext.h +++ b/src/internal_includes/HLSLCrossCompilerContext.h @@ -15,48 +15,52 @@ class HLSLccReflection; class HLSLCrossCompilerContext { public: - HLSLCrossCompilerContext(HLSLccReflection &refl) : m_Reflection(refl) {} + HLSLCrossCompilerContext(HLSLccReflection &refl) : m_Reflection(refl) {} - bstring glsl; - bstring extensions; - bstring beforeMain; + bstring glsl; + bstring extensions; + bstring beforeMain; - bstring* currentGLSLString;//either glsl or earlyMain of current phase + bstring* currentGLSLString;//either glsl or earlyMain of current phase - uint32_t currentPhase; + uint32_t currentPhase; - int indent; - unsigned int flags; + int indent; + unsigned int flags; - // Helper functions for checking flags - // Returns true if VULKAN_BINDINGS flag is set - bool IsVulkan() const; + // Helper functions for checking flags + // Returns true if VULKAN_BINDINGS flag is set + bool IsVulkan() const; - Shader* psShader; - GLSLCrossDependencyData* psDependencies; - const char *inputPrefix; // Prefix for shader inputs - const char *outputPrefix; // Prefix for shader outputs + // Helper functions for checking flags + // Returns true if HLSLCC_FLAG_NVN_TARGET flag is set + bool IsSwitch() const; - void DoDataTypeAnalysis(ShaderPhase *psPhase); + Shader* psShader; + GLSLCrossDependencyData* psDependencies; + const char *inputPrefix; // Prefix for shader inputs + const char *outputPrefix; // Prefix for shader outputs - void ClearDependencyData(); + void DoDataTypeAnalysis(ShaderPhase *psPhase); - void AddIndentation(); + void ClearDependencyData(); - // Currently active translator - Translator *psTranslator; + void AddIndentation(); - HLSLccReflection &m_Reflection; // Callbacks for bindings and diagnostic info + // Currently active translator + Translator *psTranslator; - // Retrieve the name for which the input or output is declared as. Takes into account possible redirections. - std::string GetDeclaredInputName(const Operand* psOperand, int *piRebase, int iIgnoreRedirect, uint32_t *puiIgnoreSwizzle) const; - std::string GetDeclaredOutputName(const Operand* psOperand, int* stream, uint32_t *puiIgnoreSwizzle, int *piRebase, int iIgnoreRedirect) const; + HLSLccReflection &m_Reflection; // Callbacks for bindings and diagnostic info - bool OutputNeedsDeclaring(const Operand* psOperand, const int count); + // Retrieve the name for which the input or output is declared as. Takes into account possible redirections. + std::string GetDeclaredInputName(const Operand* psOperand, int *piRebase, int iIgnoreRedirect, uint32_t *puiIgnoreSwizzle) const; + std::string GetDeclaredOutputName(const Operand* psOperand, int* stream, uint32_t *puiIgnoreSwizzle, int *piRebase, int iIgnoreRedirect) const; - bool RequireExtension(const std::string &extName); - bool EnableExtension(const std::string &extName); + bool OutputNeedsDeclaring(const Operand* psOperand, const int count); + + bool RequireExtension(const std::string &extName); + bool EnableExtension(const std::string &extName); private: - std::set m_EnabledExtensions; + std::set m_EnabledExtensions; }; diff --git a/src/internal_includes/HLSLccToolkit.h b/src/internal_includes/HLSLccToolkit.h index 1fed038..e9ce772 100644 --- a/src/internal_includes/HLSLccToolkit.h +++ b/src/internal_includes/HLSLccToolkit.h @@ -13,117 +13,118 @@ struct ConstantBuffer; namespace HLSLcc { - uint32_t GetNumberBitsSet(uint32_t a); + uint32_t GetNumberBitsSet(uint32_t a); - uint32_t SVTTypeToFlag(const SHADER_VARIABLE_TYPE eType); + uint32_t SVTTypeToFlag(const SHADER_VARIABLE_TYPE eType); - SHADER_VARIABLE_TYPE TypeFlagsToSVTType(const uint32_t typeflags); + SHADER_VARIABLE_TYPE TypeFlagsToSVTType(const uint32_t typeflags); - const char * GetConstructorForType(const HLSLCrossCompilerContext *psContext, const SHADER_VARIABLE_TYPE eType, const int components, bool useGLSLPrecision = true); + const char * GetConstructorForType(const HLSLCrossCompilerContext *psContext, const SHADER_VARIABLE_TYPE eType, const int components, bool useGLSLPrecision = true); - const char * GetConstructorForTypeGLSL(const HLSLCrossCompilerContext *context, const SHADER_VARIABLE_TYPE eType, const int components, bool useGLSLPrecision); + const char * GetConstructorForTypeGLSL(const HLSLCrossCompilerContext *context, const SHADER_VARIABLE_TYPE eType, const int components, bool useGLSLPrecision); - const char * GetConstructorForTypeMetal(const SHADER_VARIABLE_TYPE eType, const int components); + const char * GetConstructorForTypeMetal(const SHADER_VARIABLE_TYPE eType, const int components); - std::string GetMatrixTypeName(const HLSLCrossCompilerContext *psContext, const SHADER_VARIABLE_TYPE eBaseType, const int columns, const int rows); + std::string GetMatrixTypeName(const HLSLCrossCompilerContext *psContext, const SHADER_VARIABLE_TYPE eBaseType, const int columns, const int rows); - void AddSwizzleUsingElementCount(bstring dest, uint32_t count); + void AddSwizzleUsingElementCount(bstring dest, uint32_t count); - int WriteMaskToComponentCount(uint32_t writeMask); + int WriteMaskToComponentCount(uint32_t writeMask); - uint32_t BuildComponentMaskFromElementCount(int count); + uint32_t BuildComponentMaskFromElementCount(int count); - // Returns true if we can do direct assignment between types (mostly for mediump<->highp floats etc) - bool DoAssignmentDataTypesMatch(SHADER_VARIABLE_TYPE dest, SHADER_VARIABLE_TYPE src); + // Returns true if we can do direct assignment between types (mostly for mediump<->highp floats etc) + bool DoAssignmentDataTypesMatch(SHADER_VARIABLE_TYPE dest, SHADER_VARIABLE_TYPE src); - // Convert resource return type to SVT_ flags - uint32_t ResourceReturnTypeToFlag(const RESOURCE_RETURN_TYPE eType); + // Convert resource return type to SVT_ flags + uint32_t ResourceReturnTypeToFlag(const RESOURCE_RETURN_TYPE eType); - SHADER_VARIABLE_TYPE ResourceReturnTypeToSVTType(const RESOURCE_RETURN_TYPE eType, const REFLECT_RESOURCE_PRECISION ePrec); + SHADER_VARIABLE_TYPE ResourceReturnTypeToSVTType(const RESOURCE_RETURN_TYPE eType, const REFLECT_RESOURCE_PRECISION ePrec); - uint32_t ElemCountToAutoExpandFlag(uint32_t elemCount); + uint32_t ElemCountToAutoExpandFlag(uint32_t elemCount); - bool IsOperationCommutative(int /* OPCODE_TYPE */ eOpCode); + bool IsOperationCommutative(int /* OPCODE_TYPE */ eOpCode); - bool AreTempOperandsIdentical(const Operand * psA, const Operand * psB); + bool AreTempOperandsIdentical(const Operand * psA, const Operand * psB); - int GetNumTextureDimensions(int /* RESOURCE_DIMENSION */ eResDim); + int GetNumTextureDimensions(int /* RESOURCE_DIMENSION */ eResDim); - SHADER_VARIABLE_TYPE SelectHigherType(SHADER_VARIABLE_TYPE a, SHADER_VARIABLE_TYPE b); + SHADER_VARIABLE_TYPE SelectHigherType(SHADER_VARIABLE_TYPE a, SHADER_VARIABLE_TYPE b); - // Returns true if the instruction adds 1 to the destination temp register - bool IsAddOneInstruction(const Instruction *psInst); + // Returns true if the instruction adds 1 to the destination temp register + bool IsAddOneInstruction(const Instruction *psInst); - bool CanDoDirectCast(const HLSLCrossCompilerContext *context, SHADER_VARIABLE_TYPE src, SHADER_VARIABLE_TYPE dest); + bool CanDoDirectCast(const HLSLCrossCompilerContext *context, SHADER_VARIABLE_TYPE src, SHADER_VARIABLE_TYPE dest); bool IsUnityFlexibleInstancingBuffer(const ConstantBuffer* psCBuf); - // Helper function to print floats with full precision - void PrintFloat(bstring b, float f); + // Helper function to print floats with full precision + void PrintFloat(bstring b, float f); - // Flags for ForeachOperand - // Process suboperands + bstring GetEarlyMain(HLSLCrossCompilerContext *psContext); + bstring GetPostShaderCode(HLSLCrossCompilerContext *psContext); + + // Flags for ForeachOperand + // Process suboperands #define FEO_FLAG_SUBOPERAND 1 - // Process src operands + // Process src operands #define FEO_FLAG_SRC_OPERAND 2 - // Process destination operands + // Process destination operands #define FEO_FLAG_DEST_OPERAND 4 - // Convenience: Process all operands, both src and dest, and all suboperands + // Convenience: Process all operands, both src and dest, and all suboperands #define FEO_FLAG_ALL (FEO_FLAG_SUBOPERAND | FEO_FLAG_SRC_OPERAND | FEO_FLAG_DEST_OPERAND) - // For_each for all operands within a range of instructions. Flags above. - template void ForEachOperand(ItrType _begin, ItrType _end, int flags, F callback) - { - ItrType inst = _begin; - while (inst != _end) - { - uint32_t i, k; - - if ((flags & FEO_FLAG_DEST_OPERAND) || (flags & FEO_FLAG_SUBOPERAND)) - { - for (i = 0; i < inst->ui32FirstSrc; i++) - { - if (flags & FEO_FLAG_SUBOPERAND) - { - for (k = 0; k < MAX_SUB_OPERANDS; k++) - { - if (inst->asOperands[i].m_SubOperands[k].get()) - { - callback(inst, inst->asOperands[i].m_SubOperands[k].get(), FEO_FLAG_SUBOPERAND); - } - } - } - if (flags & FEO_FLAG_DEST_OPERAND) - { - callback(inst, &inst->asOperands[i], FEO_FLAG_DEST_OPERAND); - } - } - } - - if ((flags & FEO_FLAG_SRC_OPERAND) || (flags & FEO_FLAG_SUBOPERAND)) - { - for (i = inst->ui32FirstSrc; i < inst->ui32NumOperands; i++) - { - if (flags & FEO_FLAG_SUBOPERAND) - { - for (k = 0; k < MAX_SUB_OPERANDS; k++) - { - if (inst->asOperands[i].m_SubOperands[k].get()) - { - callback(inst, inst->asOperands[i].m_SubOperands[k].get(), FEO_FLAG_SUBOPERAND); - } - } - } - if (flags & FEO_FLAG_SRC_OPERAND) - { - callback(inst, &inst->asOperands[i], FEO_FLAG_SRC_OPERAND); - } - } - } - - inst++; - } - } - - -}; + // For_each for all operands within a range of instructions. Flags above. + template void ForEachOperand(ItrType _begin, ItrType _end, int flags, F callback) + { + ItrType inst = _begin; + while (inst != _end) + { + uint32_t i, k; + + if ((flags & FEO_FLAG_DEST_OPERAND) || (flags & FEO_FLAG_SUBOPERAND)) + { + for (i = 0; i < inst->ui32FirstSrc; i++) + { + if (flags & FEO_FLAG_SUBOPERAND) + { + for (k = 0; k < MAX_SUB_OPERANDS; k++) + { + if (inst->asOperands[i].m_SubOperands[k].get()) + { + callback(inst, inst->asOperands[i].m_SubOperands[k].get(), FEO_FLAG_SUBOPERAND); + } + } + } + if (flags & FEO_FLAG_DEST_OPERAND) + { + callback(inst, &inst->asOperands[i], FEO_FLAG_DEST_OPERAND); + } + } + } + + if ((flags & FEO_FLAG_SRC_OPERAND) || (flags & FEO_FLAG_SUBOPERAND)) + { + for (i = inst->ui32FirstSrc; i < inst->ui32NumOperands; i++) + { + if (flags & FEO_FLAG_SUBOPERAND) + { + for (k = 0; k < MAX_SUB_OPERANDS; k++) + { + if (inst->asOperands[i].m_SubOperands[k].get()) + { + callback(inst, inst->asOperands[i].m_SubOperands[k].get(), FEO_FLAG_SUBOPERAND); + } + } + } + if (flags & FEO_FLAG_SRC_OPERAND) + { + callback(inst, &inst->asOperands[i], FEO_FLAG_SRC_OPERAND); + } + } + } + + inst++; + } + } +} diff --git a/src/internal_includes/Instruction.h b/src/internal_includes/Instruction.h index 50677c9..f6a32e3 100644 --- a/src/internal_includes/Instruction.h +++ b/src/internal_includes/Instruction.h @@ -21,134 +21,134 @@ struct Instruction { - Instruction() - : eOpcode(OPCODE_NOP) - , eBooleanTestType(INSTRUCTION_TEST_ZERO) - , ui32NumOperands(0) - , ui32FirstSrc(0) - , m_Uses() - , m_SkipTranslation(false) - , m_InductorRegister(0) - , bSaturate(0) - , m_IsStaticBranch(false) - , m_StaticBranchCondition(NULL) - { - m_LoopInductors[0] = m_LoopInductors[1] = m_LoopInductors[2] = m_LoopInductors[3] = 0; - } - - // For creating unit tests only. Create an instruction with temps (unless reg is 0xffffffff in which case use OPERAND_TYPE_INPUT/OUTPUT) - Instruction(uint64_t _id, OPCODE_TYPE opcode, uint32_t reg1 = 0, uint32_t reg1Mask = 0, uint32_t reg2 = 0, uint32_t reg2Mask = 0, uint32_t reg3 = 0, uint32_t reg3Mask = 0, uint32_t reg4 = 0, uint32_t reg4Mask = 0) - { - id = _id; - eOpcode = opcode; - eBooleanTestType = INSTRUCTION_TEST_ZERO; + Instruction() + : eOpcode(OPCODE_NOP) + , eBooleanTestType(INSTRUCTION_TEST_ZERO) + , ui32NumOperands(0) + , ui32FirstSrc(0) + , m_Uses() + , m_SkipTranslation(false) + , m_InductorRegister(0) + , bSaturate(0) + , m_IsStaticBranch(false) + , m_StaticBranchCondition(NULL) + { + m_LoopInductors[0] = m_LoopInductors[1] = m_LoopInductors[2] = m_LoopInductors[3] = 0; + } + + // For creating unit tests only. Create an instruction with temps (unless reg is 0xffffffff in which case use OPERAND_TYPE_INPUT/OUTPUT) + Instruction(uint64_t _id, OPCODE_TYPE opcode, uint32_t reg1 = 0, uint32_t reg1Mask = 0, uint32_t reg2 = 0, uint32_t reg2Mask = 0, uint32_t reg3 = 0, uint32_t reg3Mask = 0, uint32_t reg4 = 0, uint32_t reg4Mask = 0) + { + id = _id; + eOpcode = opcode; + eBooleanTestType = INSTRUCTION_TEST_ZERO; ui32FirstSrc = 0; - ui32NumOperands = 0; - m_LoopInductors[0] = m_LoopInductors[1] = m_LoopInductors[2] = m_LoopInductors[3] = 0; - m_SkipTranslation = false; - m_InductorRegister = 0; - - if (reg1Mask == 0) - return; - - ui32NumOperands++; - asOperands[0].eType = reg1 == 0xffffffff ? OPERAND_TYPE_OUTPUT : OPERAND_TYPE_TEMP; - asOperands[0].ui32RegisterNumber = reg1 == 0xffffffff ? 0 : reg1; - asOperands[0].ui32CompMask = reg1Mask; - asOperands[0].eSelMode = OPERAND_4_COMPONENT_MASK_MODE; - - if (reg2Mask == 0) - return; - - ui32FirstSrc = 1; - ui32NumOperands++; - - asOperands[1].eType = reg2 == 0xffffffff ? OPERAND_TYPE_INPUT : OPERAND_TYPE_TEMP; - asOperands[1].ui32RegisterNumber = reg2 == 0xffffffff ? 0 : reg2; - asOperands[1].ui32CompMask = reg2Mask; - asOperands[1].eSelMode = OPERAND_4_COMPONENT_MASK_MODE; - - if (reg3Mask == 0) - return; - ui32NumOperands++; - - asOperands[2].eType = reg3 == 0xffffffff ? OPERAND_TYPE_INPUT : OPERAND_TYPE_TEMP; - asOperands[2].ui32RegisterNumber = reg3 == 0xffffffff ? 0 : reg3; - asOperands[2].ui32CompMask = reg3Mask; - asOperands[2].eSelMode = OPERAND_4_COMPONENT_MASK_MODE; - - if (reg4Mask == 0) - return; - ui32NumOperands++; - - asOperands[3].eType = reg4 == 0xffffffff ? OPERAND_TYPE_INPUT : OPERAND_TYPE_TEMP; - asOperands[3].ui32RegisterNumber = reg4 == 0xffffffff ? 0 : reg4; - asOperands[3].ui32CompMask = reg4Mask; - asOperands[3].eSelMode = OPERAND_4_COMPONENT_MASK_MODE; - } - - // Returns true if this instruction is a conditional branch - bool IsConditionalBranchInstruction() const - { - switch (eOpcode) - { - case OPCODE_IF: - case OPCODE_BREAKC: - case OPCODE_CONTINUEC: - case OPCODE_RETC: - return true; - default: - return false; - } - } - - bool IsPartialPrecisionSamplerInstruction(const ShaderInfo &info, OPERAND_MIN_PRECISION *pType) const; - - // Flags for ChangeOperandTempRegister + ui32NumOperands = 0; + m_LoopInductors[0] = m_LoopInductors[1] = m_LoopInductors[2] = m_LoopInductors[3] = 0; + m_SkipTranslation = false; + m_InductorRegister = 0; + + if (reg1Mask == 0) + return; + + ui32NumOperands++; + asOperands[0].eType = reg1 == 0xffffffff ? OPERAND_TYPE_OUTPUT : OPERAND_TYPE_TEMP; + asOperands[0].ui32RegisterNumber = reg1 == 0xffffffff ? 0 : reg1; + asOperands[0].ui32CompMask = reg1Mask; + asOperands[0].eSelMode = OPERAND_4_COMPONENT_MASK_MODE; + + if (reg2Mask == 0) + return; + + ui32FirstSrc = 1; + ui32NumOperands++; + + asOperands[1].eType = reg2 == 0xffffffff ? OPERAND_TYPE_INPUT : OPERAND_TYPE_TEMP; + asOperands[1].ui32RegisterNumber = reg2 == 0xffffffff ? 0 : reg2; + asOperands[1].ui32CompMask = reg2Mask; + asOperands[1].eSelMode = OPERAND_4_COMPONENT_MASK_MODE; + + if (reg3Mask == 0) + return; + ui32NumOperands++; + + asOperands[2].eType = reg3 == 0xffffffff ? OPERAND_TYPE_INPUT : OPERAND_TYPE_TEMP; + asOperands[2].ui32RegisterNumber = reg3 == 0xffffffff ? 0 : reg3; + asOperands[2].ui32CompMask = reg3Mask; + asOperands[2].eSelMode = OPERAND_4_COMPONENT_MASK_MODE; + + if (reg4Mask == 0) + return; + ui32NumOperands++; + + asOperands[3].eType = reg4 == 0xffffffff ? OPERAND_TYPE_INPUT : OPERAND_TYPE_TEMP; + asOperands[3].ui32RegisterNumber = reg4 == 0xffffffff ? 0 : reg4; + asOperands[3].ui32CompMask = reg4Mask; + asOperands[3].eSelMode = OPERAND_4_COMPONENT_MASK_MODE; + } + + // Returns true if this instruction is a conditional branch + bool IsConditionalBranchInstruction() const + { + switch (eOpcode) + { + case OPCODE_IF: + case OPCODE_BREAKC: + case OPCODE_CONTINUEC: + case OPCODE_RETC: + return true; + default: + return false; + } + } + + bool IsPartialPrecisionSamplerInstruction(const ShaderInfo &info, OPERAND_MIN_PRECISION *pType) const; + + // Flags for ChangeOperandTempRegister #define UD_CHANGE_SUBOPERANDS 1 #define UD_CHANGE_MAIN_OPERAND 2 #define UD_CHANGE_ALL 3 - void ChangeOperandTempRegister(Operand *psOperand, uint32_t oldReg, uint32_t newReg, uint32_t compMask, uint32_t flags, uint32_t rebase); + void ChangeOperandTempRegister(Operand *psOperand, uint32_t oldReg, uint32_t newReg, uint32_t compMask, uint32_t flags, uint32_t rebase); - OPCODE_TYPE eOpcode; - INSTRUCTION_TEST_BOOLEAN eBooleanTestType; - uint32_t ui32SyncFlags; - uint32_t ui32NumOperands; - uint32_t ui32FirstSrc; - Operand asOperands[6]; - uint32_t bSaturate; - uint32_t ui32FuncIndexWithinInterface; - RESINFO_RETURN_TYPE eResInfoReturnType; + OPCODE_TYPE eOpcode; + INSTRUCTION_TEST_BOOLEAN eBooleanTestType; + uint32_t ui32SyncFlags; + uint32_t ui32NumOperands; + uint32_t ui32FirstSrc; + Operand asOperands[6]; + uint32_t bSaturate; + uint32_t ui32FuncIndexWithinInterface; + RESINFO_RETURN_TYPE eResInfoReturnType; - int bAddressOffset; - int8_t iUAddrOffset; - int8_t iVAddrOffset; - int8_t iWAddrOffset; - RESOURCE_RETURN_TYPE xType, yType, zType, wType; - RESOURCE_DIMENSION eResDim; - int8_t iCausedSplit; // Nonzero if has caused a temp split. Later used by sampler datatype tweaking + int bAddressOffset; + int8_t iUAddrOffset; + int8_t iVAddrOffset; + int8_t iWAddrOffset; + RESOURCE_RETURN_TYPE xType, yType, zType, wType; + RESOURCE_DIMENSION eResDim; + int8_t iCausedSplit; // Nonzero if has caused a temp split. Later used by sampler datatype tweaking - bool m_IsStaticBranch; // If true, this instruction is a static branch - const Instruction *m_StaticBranchCondition; // If this is a static branch, this instruction points to the condition instruction. Can also be NULL if the operand itself is the condition - std::string m_StaticBranchName; // The name of the static branch variable, with the condition encoded in it. + bool m_IsStaticBranch; // If true, this instruction is a static branch + const Instruction *m_StaticBranchCondition; // If this is a static branch, this instruction points to the condition instruction. Can also be NULL if the operand itself is the condition + std::string m_StaticBranchName; // The name of the static branch variable, with the condition encoded in it. - struct Use - { - Use() : m_Inst(0), m_Op(0) {} - Use(const Use &a) : m_Inst(a.m_Inst), m_Op(a.m_Op) {} - Use(Instruction *inst, Operand *op) : m_Inst(inst), m_Op(op) {} + struct Use + { + Use() : m_Inst(0), m_Op(0) {} + Use(const Use &a) : m_Inst(a.m_Inst), m_Op(a.m_Op) {} + Use(Instruction *inst, Operand *op) : m_Inst(inst), m_Op(op) {} - Instruction *m_Inst; // The instruction that references the result of this instruction - Operand *m_Op; // The operand within the instruction above. Note: can also be suboperand. - }; + Instruction *m_Inst; // The instruction that references the result of this instruction + Operand *m_Op; // The operand within the instruction above. Note: can also be suboperand. + }; - std::vector m_Uses; // Array of use sites for the result(s) of this instruction, if any of the results is a temp reg. + std::vector m_Uses; // Array of use sites for the result(s) of this instruction, if any of the results is a temp reg. - Instruction *m_LoopInductors[4]; // If OPCODE_LOOP and is suitable for transforming into for-loop, contains pointers to for initializer, end condition, breakc, and increment. - bool m_SkipTranslation; // If true, don't emit this instruction (currently used by the for loop translation) - uint32_t m_InductorRegister; // If non-zero, the inductor variable can be declared in the for statement, and this register number has been allocated for it + Instruction *m_LoopInductors[4]; // If OPCODE_LOOP and is suitable for transforming into for-loop, contains pointers to for initializer, end condition, breakc, and increment. + bool m_SkipTranslation; // If true, don't emit this instruction (currently used by the for loop translation) + uint32_t m_InductorRegister; // If non-zero, the inductor variable can be declared in the for statement, and this register number has been allocated for it - uint64_t id; + uint64_t id; }; diff --git a/src/internal_includes/LoopTransform.h b/src/internal_includes/LoopTransform.h index c3b0fc4..dacec4b 100644 --- a/src/internal_includes/LoopTransform.h +++ b/src/internal_includes/LoopTransform.h @@ -1,9 +1,8 @@ - #pragma once class ShaderPhase; class HLSLCrossCompilerContext; namespace HLSLcc { - void DoLoopTransform(HLSLCrossCompilerContext *psContext, ShaderPhase &phase); -}; + void DoLoopTransform(HLSLCrossCompilerContext *psContext, ShaderPhase &phase); +} diff --git a/src/internal_includes/Operand.h b/src/internal_includes/Operand.h index 701d505..e1e91a2 100644 --- a/src/internal_includes/Operand.h +++ b/src/internal_includes/Operand.h @@ -4,7 +4,7 @@ #include #include -enum{ MAX_SUB_OPERANDS = 3 }; +enum { MAX_SUB_OPERANDS = 3 }; class Operand; class HLSLCrossCompilerContext; struct Instruction; @@ -17,130 +17,129 @@ struct Instruction; class Operand { public: - typedef std::shared_ptr SubOperandPtr; - - Operand() - : - iExtended(), - eType(), - eModifier(), - eMinPrecision(), - iIndexDims(), - iWriteMask(), - iGSInput(), - iPSInOut(), - iWriteMaskEnabled(), - iArrayElements(), - iNumComponents(), - eSelMode(), - ui32CompMask(), - ui32Swizzle(), - aui32Swizzle(), - aui32ArraySizes(), - ui32RegisterNumber(), - afImmediates(), - adImmediates(), - eSpecialName(), - specialName(), - eIndexRep(), - m_SubOperands(), - aeDataType(), - m_Rebase(0), - m_Size(0), - m_Defines(), - m_ForLoopInductorName(0) + typedef std::shared_ptr SubOperandPtr; + + Operand() + : + iExtended(), + eType(), + eModifier(), + eMinPrecision(), + iIndexDims(), + iWriteMask(), + iGSInput(), + iPSInOut(), + iWriteMaskEnabled(), + iArrayElements(), + iNumComponents(), + eSelMode(), + ui32CompMask(), + ui32Swizzle(), + aui32Swizzle(), + aui32ArraySizes(), + ui32RegisterNumber(), + afImmediates(), + adImmediates(), + eSpecialName(), + specialName(), + eIndexRep(), + m_SubOperands(), + aeDataType(), + m_Rebase(0), + m_Size(0), + m_Defines(), + m_ForLoopInductorName(0) #ifdef _DEBUG - , id(0) + , id(0) #endif - {} + {} - // Retrieve the mask of all the components this operand accesses (either reads from or writes to). - // Note that destination writemask does affect the effective access mask. - uint32_t GetAccessMask() const; - - // Returns the index of the highest accessed component, based on component mask - int GetMaxComponent() const; + // Retrieve the mask of all the components this operand accesses (either reads from or writes to). + // Note that destination writemask does affect the effective access mask. + uint32_t GetAccessMask() const; - bool IsSwizzleReplicated() const; + // Returns the index of the highest accessed component, based on component mask + int GetMaxComponent() const; - // Get the number of elements returned by operand, taking additional component mask into account - //e.g. - //.z = 1 - //.x = 1 - //.yw = 2 - uint32_t GetNumSwizzleElements(uint32_t ui32CompMask = OPERAND_4_COMPONENT_MASK_ALL) const; + bool IsSwizzleReplicated() const; - // When this operand is used as an input declaration, how many components does it have? - int GetNumInputElements(const HLSLCrossCompilerContext *psContext) const; + // Get the number of elements returned by operand, taking additional component mask into account + //e.g. + //.z = 1 + //.x = 1 + //.yw = 2 + uint32_t GetNumSwizzleElements(uint32_t ui32CompMask = OPERAND_4_COMPONENT_MASK_ALL) const; - // Retrieve the operand data type. - SHADER_VARIABLE_TYPE GetDataType(HLSLCrossCompilerContext* psContext, SHADER_VARIABLE_TYPE ePreferredTypeForImmediates = SVT_INT) const; + // When this operand is used as an input declaration, how many components does it have? + int GetNumInputElements(const HLSLCrossCompilerContext *psContext) const; - // Returns 0 if the register used by the operand is per-vertex, or 1 if per-patch - int GetRegisterSpace(const HLSLCrossCompilerContext *psContext) const; - // Same as above but with explicit shader type and phase - int GetRegisterSpace(SHADER_TYPE eShaderType, SHADER_PHASE_TYPE eShaderPhaseType) const; + // Retrieve the operand data type. + SHADER_VARIABLE_TYPE GetDataType(HLSLCrossCompilerContext* psContext, SHADER_VARIABLE_TYPE ePreferredTypeForImmediates = SVT_INT) const; + + // Returns 0 if the register used by the operand is per-vertex, or 1 if per-patch + int GetRegisterSpace(const HLSLCrossCompilerContext *psContext) const; + // Same as above but with explicit shader type and phase + int GetRegisterSpace(SHADER_TYPE eShaderType, SHADER_PHASE_TYPE eShaderPhaseType) const; // Find the operand that contains the dynamic index for this operand (array in constant buffer). // When isAoS is true, we'll try to find the original index var to avoid additional calculations. // needsIndexCalcRevert output will tell if we need to divide the value to get the correct index. Operand* GetDynamicIndexOperand(HLSLCrossCompilerContext *psContext, const ShaderVarType* psVar, bool isAoS, bool *needsIndexCalcRevert) const; - // Maps REFLECT_RESOURCE_PRECISION into OPERAND_MIN_PRECISION as much as possible - static OPERAND_MIN_PRECISION ResourcePrecisionToOperandPrecision(REFLECT_RESOURCE_PRECISION ePrec); + // Maps REFLECT_RESOURCE_PRECISION into OPERAND_MIN_PRECISION as much as possible + static OPERAND_MIN_PRECISION ResourcePrecisionToOperandPrecision(REFLECT_RESOURCE_PRECISION ePrec); - int iExtended; - OPERAND_TYPE eType; - OPERAND_MODIFIER eModifier; - OPERAND_MIN_PRECISION eMinPrecision; - int iIndexDims; - int iWriteMask; - int iGSInput; - int iPSInOut; - int iWriteMaskEnabled; - int iArrayElements; - int iNumComponents; + int iExtended; + OPERAND_TYPE eType; + OPERAND_MODIFIER eModifier; + OPERAND_MIN_PRECISION eMinPrecision; + int iIndexDims; + int iWriteMask; + int iGSInput; + int iPSInOut; + int iWriteMaskEnabled; + int iArrayElements; + int iNumComponents; - OPERAND_4_COMPONENT_SELECTION_MODE eSelMode; - uint32_t ui32CompMask; - uint32_t ui32Swizzle; - uint32_t aui32Swizzle[4]; + OPERAND_4_COMPONENT_SELECTION_MODE eSelMode; + uint32_t ui32CompMask; + uint32_t ui32Swizzle; + uint32_t aui32Swizzle[4]; - uint32_t aui32ArraySizes[3]; - uint32_t ui32RegisterNumber; - //If eType is OPERAND_TYPE_IMMEDIATE32 - float afImmediates[4]; - //If eType is OPERAND_TYPE_IMMEDIATE64 - double adImmediates[4]; + uint32_t aui32ArraySizes[3]; + uint32_t ui32RegisterNumber; + //If eType is OPERAND_TYPE_IMMEDIATE32 + float afImmediates[4]; + //If eType is OPERAND_TYPE_IMMEDIATE64 + double adImmediates[4]; - SPECIAL_NAME eSpecialName; - std::string specialName; + SPECIAL_NAME eSpecialName; + std::string specialName; - OPERAND_INDEX_REPRESENTATION eIndexRep[3]; + OPERAND_INDEX_REPRESENTATION eIndexRep[3]; - SubOperandPtr m_SubOperands[MAX_SUB_OPERANDS]; + SubOperandPtr m_SubOperands[MAX_SUB_OPERANDS]; - //One type for each component. - SHADER_VARIABLE_TYPE aeDataType[4]; + //One type for each component. + SHADER_VARIABLE_TYPE aeDataType[4]; - uint32_t m_Rebase; // Rebase value, for constant array accesses. - uint32_t m_Size; // Component count, only for constant array access. + uint32_t m_Rebase; // Rebase value, for constant array accesses. + uint32_t m_Size; // Component count, only for constant array access. - struct Define - { - Define() : m_Inst(0), m_Op(0) {} - Define(const Define &a) : m_Inst(a.m_Inst), m_Op(a.m_Op) {} - Define(Instruction *inst, Operand *op) : m_Inst(inst), m_Op(op) {} + struct Define + { + Define() : m_Inst(0), m_Op(0) {} + Define(const Define &a) : m_Inst(a.m_Inst), m_Op(a.m_Op) {} + Define(Instruction *inst, Operand *op) : m_Inst(inst), m_Op(op) {} - Instruction *m_Inst; // Instruction that writes to the temp - Operand *m_Op; // The (destination) operand within that instruction. - }; + Instruction *m_Inst; // Instruction that writes to the temp + Operand *m_Op; // The (destination) operand within that instruction. + }; - std::vector m_Defines; // Array of instructions whose results this operand can use. (only if eType == OPERAND_TYPE_TEMP) - uint32_t m_ForLoopInductorName; // If non-zero, this (eType==OPERAND_TYPE_TEMP) is an inductor variable used in for loop, and it has a special number as given here (overrides ui32RegisterNumber) + std::vector m_Defines; // Array of instructions whose results this operand can use. (only if eType == OPERAND_TYPE_TEMP) + uint32_t m_ForLoopInductorName; // If non-zero, this (eType==OPERAND_TYPE_TEMP) is an inductor variable used in for loop, and it has a special number as given here (overrides ui32RegisterNumber) #ifdef _DEBUG - uint64_t id; + uint64_t id; #endif }; - diff --git a/src/internal_includes/Shader.h b/src/internal_includes/Shader.h index 26eabee..5c2eee0 100644 --- a/src/internal_includes/Shader.h +++ b/src/internal_includes/Shader.h @@ -1,4 +1,3 @@ - #pragma once #include @@ -16,251 +15,252 @@ struct ConstantArrayChunk { - ConstantArrayChunk() : m_Size(0), m_AccessMask(0) {} - ConstantArrayChunk(uint32_t sz, uint32_t mask, Operand *firstUse) - : m_Size(sz), m_AccessMask(mask) - { - m_UseSites.push_back(firstUse); - } - - uint32_t m_Size; - uint32_t m_AccessMask; - uint32_t m_Rebase; - uint32_t m_ComponentCount; - - std::vector m_UseSites; + ConstantArrayChunk() : m_Size(0), m_AccessMask(0) {} + ConstantArrayChunk(uint32_t sz, uint32_t mask, Operand *firstUse) + : m_Size(sz), m_AccessMask(mask) + { + m_UseSites.push_back(firstUse); + } + + uint32_t m_Size; + uint32_t m_AccessMask; + uint32_t m_Rebase; + uint32_t m_ComponentCount; + + std::vector m_UseSites; }; typedef std::multimap ChunkMap; struct ConstantArrayInfo { - ConstantArrayInfo() : m_OrigDeclaration(0), m_Chunks() {} + ConstantArrayInfo() : m_OrigDeclaration(0), m_Chunks() {} - Declaration *m_OrigDeclaration; // Pointer to the original declaration of the const array - ChunkMap m_Chunks; // map of , same start offset might have multiple entries for different access masks + Declaration *m_OrigDeclaration; // Pointer to the original declaration of the const array + ChunkMap m_Chunks; // map of , same start offset might have multiple entries for different access masks }; class ShaderPhase { public: - ShaderPhase() - : - ePhase(MAIN_PHASE), - ui32InstanceCount(0), - postShaderCode(), - hasPostShaderCode(0), - earlyMain(), - ui32OrigTemps(0), - ui32TotalTemps(0), - psTempDeclaration(NULL), - pui32SplitInfo(), - peTempTypes(), - acInputNeedsRedirect(), - acOutputNeedsRedirect(), - acPatchConstantsNeedsRedirect(), - m_CFG(), - m_CFGInitialized(false), - m_NextFreeTempRegister(1), - m_NextTexCoordTemp(0) - {} - - void ResolveUAVProperties(); - - void UnvectorizeImmMoves(); // Transform MOV tX.xyz, (0, 1, 2) into MOV tX.x, 0; MOV tX.y, 1; MOV tX.z, 2 to make datatype analysis easier - - void PruneConstArrays(); // Walk through everything that accesses a const array to see if we could make it smaller - - void ExpandSWAPCs(); // Expand all SWAPC opcodes into a bunch of MOVCs. Must be done first! - - ConstantArrayInfo m_ConstantArrayInfo; - - std::vector psDecl; - std::vector psInst; - - SHADER_PHASE_TYPE ePhase; - uint32_t ui32InstanceCount; // In case of hull shaders, how many instances this phase needs to have. Defaults to 1. - bstring postShaderCode;//End of main or before emit() - int hasPostShaderCode; - - bstring earlyMain;//Code to be inserted at the start of phase - - uint32_t ui32OrigTemps; // The number of temporaries this phase originally declared - uint32_t ui32TotalTemps; // The number of temporaries this phase has now - Declaration *psTempDeclaration; // Shortcut to the OPCODE_DCL_TEMPS opcode - - // The split table is a table containing the index of the original register this register was split out from, or 0xffffffff - // Format: lowest 16 bits: original register. bits 16-23: rebase (eg value of 1 means .yzw was changed to .xyz): bits 24-31: component count - std::vector pui32SplitInfo; - std::vector peTempTypes; - - // These are needed in cases we have 2 vec2 texcoords combined into one vec4 and they are accessed together. - std::vector acInputNeedsRedirect; // If 0xff, requires re-routing all reads via a combined vec4. If 0xfe, the same but the vec4 has already been declared. - std::vector acOutputNeedsRedirect; // Same for outputs - std::vector acPatchConstantsNeedsRedirect; // Same for patch constants - - // Get the Control Flow Graph for this phase, build it if necessary. - HLSLcc::ControlFlow::ControlFlowGraph &GetCFG(); - - uint32_t m_NextFreeTempRegister; // A counter for creating new temporaries for for-loops. - uint32_t m_NextTexCoordTemp; // A counter for creating tex coord temps for driver issue workarounds - - // Instructions that are static branches (branches based on constant buffer values only) - std::vector m_StaticBranchInstructions; + ShaderPhase() + : + ePhase(MAIN_PHASE), + ui32InstanceCount(0), + postShaderCode(), + hasPostShaderCode(0), + earlyMain(), + ui32OrigTemps(0), + ui32TotalTemps(0), + psTempDeclaration(NULL), + pui32SplitInfo(), + peTempTypes(), + acInputNeedsRedirect(), + acOutputNeedsRedirect(), + acPatchConstantsNeedsRedirect(), + m_CFG(), + m_CFGInitialized(false), + m_NextFreeTempRegister(1), + m_NextTexCoordTemp(0) + {} + + void ResolveUAVProperties(); + + void UnvectorizeImmMoves(); // Transform MOV tX.xyz, (0, 1, 2) into MOV tX.x, 0; MOV tX.y, 1; MOV tX.z, 2 to make datatype analysis easier + + void PruneConstArrays(); // Walk through everything that accesses a const array to see if we could make it smaller + + void ExpandSWAPCs(); // Expand all SWAPC opcodes into a bunch of MOVCs. Must be done first! + + ConstantArrayInfo m_ConstantArrayInfo; + + std::vector psDecl; + std::vector psInst; + + SHADER_PHASE_TYPE ePhase; + uint32_t ui32InstanceCount; // In case of hull shaders, how many instances this phase needs to have. Defaults to 1. + bstring postShaderCode;//End of main or before emit() + int hasPostShaderCode; + + bstring earlyMain;//Code to be inserted at the start of phase + + uint32_t ui32OrigTemps; // The number of temporaries this phase originally declared + uint32_t ui32TotalTemps; // The number of temporaries this phase has now + Declaration *psTempDeclaration; // Shortcut to the OPCODE_DCL_TEMPS opcode + + // The split table is a table containing the index of the original register this register was split out from, or 0xffffffff + // Format: lowest 16 bits: original register. bits 16-23: rebase (eg value of 1 means .yzw was changed to .xyz): bits 24-31: component count + std::vector pui32SplitInfo; + std::vector peTempTypes; + + // These are needed in cases we have 2 vec2 texcoords combined into one vec4 and they are accessed together. + std::vector acInputNeedsRedirect; // If 0xff, requires re-routing all reads via a combined vec4. If 0xfe, the same but the vec4 has already been declared. + std::vector acOutputNeedsRedirect; // Same for outputs + std::vector acPatchConstantsNeedsRedirect; // Same for patch constants + + // Get the Control Flow Graph for this phase, build it if necessary. + HLSLcc::ControlFlow::ControlFlowGraph &GetCFG(); + + uint32_t m_NextFreeTempRegister; // A counter for creating new temporaries for for-loops. + uint32_t m_NextTexCoordTemp; // A counter for creating tex coord temps for driver issue workarounds + + // Instructions that are static branches (branches based on constant buffer values only) + std::vector m_StaticBranchInstructions; private: - bool m_CFGInitialized; - HLSLcc::ControlFlow::ControlFlowGraph m_CFG; + bool m_CFGInitialized; + HLSLcc::ControlFlow::ControlFlowGraph m_CFG; }; class Shader { public: - Shader() - : - ui32MajorVersion(0), - ui32MinorVersion(0), - eShaderType(INVALID_SHADER), - eTargetLanguage(LANG_DEFAULT), - extensions(0), - fp64(0), - ui32ShaderLength(0), - aui32FuncTableToFuncPointer(), - aui32FuncBodyToFuncTable(), - funcTable(), - funcPointer(), - ui32NextClassFuncName(), - pui32FirstToken(NULL), - asPhases(), - sInfo(), - abScalarInput(), - abScalarOutput(), - aIndexedInput(), - aIndexedOutput(), - aIndexedInputParents(), - aeResourceDims(), - acInputDeclared(), - acOutputDeclared(), - aiOpcodeUsed(NUM_OPCODES, 0), - ui32CurrentVertexOutputStream(0), - textureSamplers(), - aui32StructuredBufferBindingPoints(MAX_RESOURCE_BINDINGS, 0), - ui32CurrentStructuredBufferIndex(), - m_DummySamplerDeclared(false) - { - } - - // Retrieve the number of components the temp register has. - uint32_t GetTempComponentCount(SHADER_VARIABLE_TYPE eType, uint32_t ui32Reg) const; - - //Hull shaders have multiple phases. - //Each phase has its own temps. - //Convert from per-phase temps to global temps. - void ConsolidateHullTempVars(); - - // Go through all declarations and remove UAV occupied binding points from the aui32StructuredBufferBindingPoints list - void ResolveStructuredBufferBindingSlots(ShaderPhase *psPhase); - - // HLSL has separate register spaces for UAV and structured buffers. GLSL has shared register space for all buffers. - // The aim here is to preserve the UAV buffer bindings as they are and use remaining binding points for structured buffers. - // In this step make aui32StructuredBufferBindingPoints contain increasingly ordered uints starting from zero. - void PrepareStructuredBufferBindingSlots(); - - // Detect temp registers per data type that are actually used. - void PruneTempRegisters(); - - // Check if inputs and outputs are accessed across semantic boundaries - // as in, 2x texcoord vec2's are packed together as vec4 but still accessed together. - void AnalyzeIOOverlap(); - - // Change all references to vertex position to always be highp, having them be mediump causes problems on Metal and Vivante GPUs. - void ForcePositionToHighp(); - - void FindUnusedGlobals(uint32_t flags); // Finds the DCL_CONSTANT_BUFFER with name "$Globals" and searches through all usages for each member of it and mark if they're actually ever used. - - void ExpandSWAPCs(); - - uint32_t ui32MajorVersion; - uint32_t ui32MinorVersion; - SHADER_TYPE eShaderType; - - GLLang eTargetLanguage; - const struct GlExtensions *extensions; - - int fp64; - - //DWORDs in program code, including version and length tokens. - uint32_t ui32ShaderLength; - - - //Instruction* functions;//non-main subroutines - HLSLcc::growing_vector aui32FuncTableToFuncPointer; // dynamic alloc? - HLSLcc::growing_vector aui32FuncBodyToFuncTable; - - struct FuncTableEntry{ - HLSLcc::growing_vector aui32FuncBodies; - }; - HLSLcc::growing_vector funcTable; - - struct FuncPointerEntry { - HLSLcc::growing_vector aui32FuncTables; - uint32_t ui32NumBodiesPerTable; - }; - - HLSLcc::growing_vector funcPointer; - - HLSLcc::growing_vector ui32NextClassFuncName; - - const uint32_t* pui32FirstToken;//Reference for calculating current position in token stream. - - std::vector asPhases; - - ShaderInfo sInfo; - - // There are 2 input/output register spaces in DX bytecode: one for per-patch data and one for per-vertex. - // Which one is used depends on the context: - // per-vertex space is used in vertex/pixel/geom shaders always - // hull shader control point phase uses per-vertex by default, other phases are per-patch by default (can access per-vertex with OPERAND_TYPE_I/O_CONTROL_POINT) - // domain shader is per-patch by default, can access per-vertex with OPERAND_TYPE_I/O_CONTROL_POINT - - // Below, the [2] is accessed with 0 == per-vertex, 1 == per-patch - // Note that these ints are component masks - HLSLcc::growing_vector abScalarInput[2]; - HLSLcc::growing_vector abScalarOutput[2]; + Shader() + : + ui32MajorVersion(0), + ui32MinorVersion(0), + eShaderType(INVALID_SHADER), + eTargetLanguage(LANG_DEFAULT), + extensions(0), + fp64(0), + ui32ShaderLength(0), + aui32FuncTableToFuncPointer(), + aui32FuncBodyToFuncTable(), + funcTable(), + funcPointer(), + ui32NextClassFuncName(), + pui32FirstToken(NULL), + asPhases(), + sInfo(), + abScalarInput(), + abScalarOutput(), + aIndexedInput(), + aIndexedOutput(), + aIndexedInputParents(), + aeResourceDims(), + acInputDeclared(), + acOutputDeclared(), + aiOpcodeUsed(NUM_OPCODES, 0), + ui32CurrentVertexOutputStream(0), + textureSamplers(), + aui32StructuredBufferBindingPoints(MAX_RESOURCE_BINDINGS, 0), + ui32CurrentStructuredBufferIndex(), + m_DummySamplerDeclared(false) + { + } - HLSLcc::growing_vector aIndexedInput[2]; - HLSLcc::growing_vector aIndexedOutput[2]; + // Retrieve the number of components the temp register has. + uint32_t GetTempComponentCount(SHADER_VARIABLE_TYPE eType, uint32_t ui32Reg) const; - HLSLcc::growing_vector aIndexedInputParents[2]; + //Hull shaders have multiple phases. + //Each phase has its own temps. + //Convert from per-phase temps to global temps. + void ConsolidateHullTempVars(); + + // Go through all declarations and remove UAV occupied binding points from the aui32StructuredBufferBindingPoints list + void ResolveStructuredBufferBindingSlots(ShaderPhase *psPhase); + + // HLSL has separate register spaces for UAV and structured buffers. GLSL has shared register space for all buffers. + // The aim here is to preserve the UAV buffer bindings as they are and use remaining binding points for structured buffers. + // In this step make aui32StructuredBufferBindingPoints contain increasingly ordered uints starting from zero. + void PrepareStructuredBufferBindingSlots(); - HLSLcc::growing_vector aeResourceDims; + // Detect temp registers per data type that are actually used. + void PruneTempRegisters(); - HLSLcc::growing_vector acInputDeclared[2]; - HLSLcc::growing_vector acOutputDeclared[2]; + // Check if inputs and outputs are accessed across semantic boundaries + // as in, 2x texcoord vec2's are packed together as vec4 but still accessed together. + void AnalyzeIOOverlap(); - std::vector aiOpcodeUsed; // Initialized to NUM_OPCODES elements above. + // Change all references to vertex position to always be highp, having them be mediump causes problems on Metal and Vivante GPUs. + void ForcePositionToHighp(); - uint32_t ui32CurrentVertexOutputStream; + void FindUnusedGlobals(uint32_t flags); // Finds the DCL_CONSTANT_BUFFER with name "$Globals" and searches through all usages for each member of it and mark if they're actually ever used. - TextureSamplerPairs textureSamplers; + void ExpandSWAPCs(); - std::vector aui32StructuredBufferBindingPoints; - uint32_t ui32CurrentStructuredBufferIndex; + uint32_t ui32MajorVersion; + uint32_t ui32MinorVersion; + SHADER_TYPE eShaderType; - std::vector psIntTempSizes; // Array for whether this temp register needs declaration as int temp - std::vector psInt16TempSizes; // min16ints - std::vector psInt12TempSizes; // min12ints - std::vector psUIntTempSizes; // Same for uints - std::vector psUInt16TempSizes; // ... and for uint16's - std::vector psFloatTempSizes; // ...and for floats - std::vector psFloat16TempSizes; // ...and for min16floats - std::vector psFloat10TempSizes; // ...and for min10floats - std::vector psDoubleTempSizes; // ...and for doubles - std::vector psBoolTempSizes; // ... and for bools + GLLang eTargetLanguage; + const struct GlExtensions *extensions; - bool m_DummySamplerDeclared; // If true, the shader doesn't declare any samplers but uses texelFetch and we have added a dummy sampler for Vulkan for that. + int fp64; -private: - void DoIOOverlapOperand(ShaderPhase *psPhase, Operand *psOperand); + //DWORDs in program code, including version and length tokens. + uint32_t ui32ShaderLength; + + + //Instruction* functions;//non-main subroutines + HLSLcc::growing_vector aui32FuncTableToFuncPointer; // dynamic alloc? + HLSLcc::growing_vector aui32FuncBodyToFuncTable; + + struct FuncTableEntry + { + HLSLcc::growing_vector aui32FuncBodies; + }; + HLSLcc::growing_vector funcTable; + + struct FuncPointerEntry + { + HLSLcc::growing_vector aui32FuncTables; + uint32_t ui32NumBodiesPerTable; + }; + + HLSLcc::growing_vector funcPointer; + + HLSLcc::growing_vector ui32NextClassFuncName; + + const uint32_t* pui32FirstToken;//Reference for calculating current position in token stream. + + std::vector asPhases; + + ShaderInfo sInfo; + + // There are 2 input/output register spaces in DX bytecode: one for per-patch data and one for per-vertex. + // Which one is used depends on the context: + // per-vertex space is used in vertex/pixel/geom shaders always + // hull shader control point phase uses per-vertex by default, other phases are per-patch by default (can access per-vertex with OPERAND_TYPE_I/O_CONTROL_POINT) + // domain shader is per-patch by default, can access per-vertex with OPERAND_TYPE_I/O_CONTROL_POINT + + // Below, the [2] is accessed with 0 == per-vertex, 1 == per-patch + // Note that these ints are component masks + HLSLcc::growing_vector abScalarInput[2]; + HLSLcc::growing_vector abScalarOutput[2]; + HLSLcc::growing_vector aIndexedInput[2]; + HLSLcc::growing_vector aIndexedOutput[2]; + + HLSLcc::growing_vector aIndexedInputParents[2]; + + HLSLcc::growing_vector aeResourceDims; + + HLSLcc::growing_vector acInputDeclared[2]; + HLSLcc::growing_vector acOutputDeclared[2]; + + std::vector aiOpcodeUsed; // Initialized to NUM_OPCODES elements above. + + uint32_t ui32CurrentVertexOutputStream; + + TextureSamplerPairs textureSamplers; + + std::vector aui32StructuredBufferBindingPoints; + uint32_t ui32CurrentStructuredBufferIndex; + + std::vector psIntTempSizes; // Array for whether this temp register needs declaration as int temp + std::vector psInt16TempSizes; // min16ints + std::vector psInt12TempSizes; // min12ints + std::vector psUIntTempSizes; // Same for uints + std::vector psUInt16TempSizes; // ... and for uint16's + std::vector psFloatTempSizes; // ...and for floats + std::vector psFloat16TempSizes; // ...and for min16floats + std::vector psFloat10TempSizes; // ...and for min10floats + std::vector psDoubleTempSizes; // ...and for doubles + std::vector psBoolTempSizes; // ... and for bools + + bool m_DummySamplerDeclared; // If true, the shader doesn't declare any samplers but uses texelFetch and we have added a dummy sampler for Vulkan for that. + +private: + void DoIOOverlapOperand(ShaderPhase *psPhase, Operand *psOperand); }; diff --git a/src/internal_includes/Translator.h b/src/internal_includes/Translator.h index e41cff5..ae5224a 100644 --- a/src/internal_includes/Translator.h +++ b/src/internal_includes/Translator.h @@ -1,4 +1,3 @@ - #pragma once #include "HLSLCrossCompilerContext.h" #include "Shader.h" @@ -8,28 +7,26 @@ struct Declaration; class Translator { protected: - HLSLCrossCompilerContext *psContext; + HLSLCrossCompilerContext *psContext; public: - explicit Translator(HLSLCrossCompilerContext *ctx) : psContext(ctx) {} - virtual ~Translator() {} - - virtual bool Translate() = 0; - - virtual void TranslateDeclaration(const Declaration *psDecl) = 0; + explicit Translator(HLSLCrossCompilerContext *ctx) : psContext(ctx) {} + virtual ~Translator() {} - // Translate system value type to name, return true if succeeded and no further translation is necessary - virtual bool TranslateSystemValue(const Operand *psOperand, const ShaderInfo::InOutSignature *sig, std::string &result, uint32_t *pui32IgnoreSwizzle, bool isIndexed, bool isInput, bool *outSkipPrefix = NULL, int *iIgnoreRedirect = NULL) = 0; + virtual bool Translate() = 0; - // In GLSL, the input and output names cannot clash. - // Also, the output name of previous stage must match the input name of the next stage. - // So, do gymnastics depending on which shader we're running on and which other shaders exist in this program. - // - virtual void SetIOPrefixes() = 0; + virtual void TranslateDeclaration(const Declaration *psDecl) = 0; - void SetExtensions(const struct GlExtensions *ext) - { - psContext->psShader->extensions = ext; - } + // Translate system value type to name, return true if succeeded and no further translation is necessary + virtual bool TranslateSystemValue(const Operand *psOperand, const ShaderInfo::InOutSignature *sig, std::string &result, uint32_t *pui32IgnoreSwizzle, bool isIndexed, bool isInput, bool *outSkipPrefix = NULL, int *iIgnoreRedirect = NULL) = 0; + // In GLSL, the input and output names cannot clash. + // Also, the output name of previous stage must match the input name of the next stage. + // So, do gymnastics depending on which shader we're running on and which other shaders exist in this program. + // + virtual void SetIOPrefixes() = 0; -}; \ No newline at end of file + void SetExtensions(const struct GlExtensions *ext) + { + psContext->psShader->extensions = ext; + } +}; diff --git a/src/internal_includes/UseDefineChains.h b/src/internal_includes/UseDefineChains.h index 3d28279..9c2b582 100644 --- a/src/internal_includes/UseDefineChains.h +++ b/src/internal_includes/UseDefineChains.h @@ -20,102 +20,100 @@ class Operand; class ShaderInfo; namespace HLSLcc { - namespace ControlFlow - { - class ControlFlowGraph; - }; -}; +namespace ControlFlow +{ + class ControlFlowGraph; +} +} // Def-Use chain per temp component struct DefineUseChainEntry { - DefineUseChainEntry() - : psInst(0) - , psOp(0) - , usages() - , writeMask(0) - , index(0) - , isStandalone(0) - { - memset(psSiblings, 0, 4 * sizeof(DefineUseChainEntry *)); - } - - Instruction *psInst; // The declaration (write to this temp component) - Operand *psOp; // The operand within this instruction for the write target - UsageSet usages; // List of usages that are dependent on this write - uint32_t writeMask; // Access mask; which all components were written to in the same op - uint32_t index; // For which component was this definition created for? - uint32_t isStandalone; // A shortcut for analysis: if nonzero, all siblings of all usages for both this and all this siblings - struct DefineUseChainEntry *psSiblings[4]; // In case of vectorized op, contains pointer to this define's corresponding entries for the other components. + DefineUseChainEntry() + : psInst(0) + , psOp(0) + , usages() + , writeMask(0) + , index(0) + , isStandalone(0) + { + memset(psSiblings, 0, 4 * sizeof(DefineUseChainEntry *)); + } + + Instruction *psInst; // The declaration (write to this temp component) + Operand *psOp; // The operand within this instruction for the write target + UsageSet usages; // List of usages that are dependent on this write + uint32_t writeMask; // Access mask; which all components were written to in the same op + uint32_t index; // For which component was this definition created for? + uint32_t isStandalone; // A shortcut for analysis: if nonzero, all siblings of all usages for both this and all this siblings + struct DefineUseChainEntry *psSiblings[4]; // In case of vectorized op, contains pointer to this define's corresponding entries for the other components. #if _DEBUG - bool operator==(const DefineUseChainEntry &a) const - { - if (psInst != a.psInst) - return false; - if (psOp != a.psOp) - return false; - if (writeMask != a.writeMask) - return false; - if (index != a.index) - return false; - if (isStandalone != a.isStandalone) - return false; - - // Just check that each one has the same amount of usages - if (usages.size() != a.usages.size()) - return false; - - return true; - } + bool operator==(const DefineUseChainEntry &a) const + { + if (psInst != a.psInst) + return false; + if (psOp != a.psOp) + return false; + if (writeMask != a.writeMask) + return false; + if (index != a.index) + return false; + if (isStandalone != a.isStandalone) + return false; + + // Just check that each one has the same amount of usages + if (usages.size() != a.usages.size()) + return false; + + return true; + } #endif - }; typedef std::list DefineUseChain; struct UseDefineChainEntry { - UseDefineChainEntry() - : psInst(0) - , psOp(0) - , defines() - , accessMask(0) - , index(0) - { - memset(psSiblings, 0, 4 * sizeof(UseDefineChainEntry *)); - } - - Instruction *psInst; // The use (read from this temp component) - Operand *psOp; // The operand within this instruction for the read - DefineSet defines; // List of writes that are visible to this read - uint32_t accessMask; // Which all components were read together with this one - uint32_t index; // For which component was this usage created for? - struct UseDefineChainEntry *psSiblings[4]; // In case of vectorized op, contains pointer to this usage's corresponding entries for the other components. + UseDefineChainEntry() + : psInst(0) + , psOp(0) + , defines() + , accessMask(0) + , index(0) + { + memset(psSiblings, 0, 4 * sizeof(UseDefineChainEntry *)); + } + + Instruction *psInst; // The use (read from this temp component) + Operand *psOp; // The operand within this instruction for the read + DefineSet defines; // List of writes that are visible to this read + uint32_t accessMask; // Which all components were read together with this one + uint32_t index; // For which component was this usage created for? + struct UseDefineChainEntry *psSiblings[4]; // In case of vectorized op, contains pointer to this usage's corresponding entries for the other components. #if _DEBUG - bool operator==(const UseDefineChainEntry &a) const - { - if (psInst != a.psInst) - return false; - if (psOp != a.psOp) - return false; - if (accessMask != a.accessMask) - return false; - if (index != a.index) - return false; - - // Just check that each one has the same amount of usages - if (defines.size() != a.defines.size()) - return false; - - return true; - } + bool operator==(const UseDefineChainEntry &a) const + { + if (psInst != a.psInst) + return false; + if (psOp != a.psOp) + return false; + if (accessMask != a.accessMask) + return false; + if (index != a.index) + return false; + + // Just check that each one has the same amount of usages + if (defines.size() != a.defines.size()) + return false; + + return true; + } #endif - }; typedef std::list UseDefineChain; @@ -138,4 +136,3 @@ void CalculateStandaloneDefinitions(DefineUseChains &psDUChains, uint32_t ui32Nu // Write the uses and defines back to Instruction and Operand member lists. void WriteBackUsesAndDefines(DefineUseChains &psDUChains); - diff --git a/src/internal_includes/debug.h b/src/internal_includes/debug.h index 1eb3dc2..bc201c0 100644 --- a/src/internal_includes/debug.h +++ b/src/internal_includes/debug.h @@ -6,14 +6,15 @@ #define ASSERT(expr) CustomAssert(expr) static void CustomAssert(int expression) { - if(!expression) + if (!expression) { assert(0); } } + #else #define UNUSED(EXPR_) \ - do { if (false) (void)(EXPR_); } while(0) + do { if (false) (void)(EXPR_); } while(0) #define ASSERT(expr) UNUSED(expr) #endif diff --git a/src/internal_includes/languages.h b/src/internal_includes/languages.h index d6c77c0..b6f58bc 100644 --- a/src/internal_includes/languages.h +++ b/src/internal_includes/languages.h @@ -7,55 +7,55 @@ static int InOutSupported(const GLLang eLang) { - if(eLang == LANG_ES_100 || eLang == LANG_120) - { - return 0; - } - return 1; + if (eLang == LANG_ES_100 || eLang == LANG_120) + { + return 0; + } + return 1; } static int WriteToFragData(const GLLang eLang) { - if(eLang == LANG_ES_100 || eLang == LANG_120) - { - return 1; - } - return 0; + if (eLang == LANG_ES_100 || eLang == LANG_120) + { + return 1; + } + return 0; } static int ShaderBitEncodingSupported(const GLLang eLang) { - if( eLang != LANG_ES_300 && - eLang != LANG_ES_310 && - eLang < LANG_330) - { - return 0; - } - return 1; + if (eLang != LANG_ES_300 && + eLang != LANG_ES_310 && + eLang < LANG_330) + { + return 0; + } + return 1; } static int HaveOverloadedTextureFuncs(const GLLang eLang) { - if(eLang == LANG_ES_100 || eLang == LANG_120) - { - return 0; - } - return 1; + if (eLang == LANG_ES_100 || eLang == LANG_120) + { + return 0; + } + return 1; } //Only enable for ES. Vulkan and Switch. //Not present in 120, ignored in other desktop languages. Specifically enabled on Vulkan. static int HavePrecisionQualifiers(const HLSLCrossCompilerContext *psContext) { - if ((psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0 || (psContext->flags & HLSLCC_FLAG_NVN_TARGET) != 0) - return 1; - - const GLLang eLang = psContext->psShader->eTargetLanguage; - if(eLang >= LANG_ES_100 && eLang <= LANG_ES_310) - { - return 1; - } - return 0; + if ((psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0 || (psContext->flags & HLSLCC_FLAG_NVN_TARGET) != 0) + return 1; + + const GLLang eLang = psContext->psShader->eTargetLanguage; + if (eLang >= LANG_ES_100 && eLang <= LANG_ES_310) + { + return 1; + } + return 0; } static int EmitLowp(const HLSLCrossCompilerContext *psContext) @@ -66,25 +66,25 @@ static int EmitLowp(const HLSLCrossCompilerContext *psContext) static int HaveCubemapArray(const GLLang eLang) { - if (eLang >= LANG_400 && eLang <= LANG_GL_LAST) - return 1; - return 0; + if (eLang >= LANG_400 && eLang <= LANG_GL_LAST) + return 1; + return 0; } static bool IsESLanguage(const GLLang eLang) { - return (eLang >= LANG_ES_FIRST && eLang <= LANG_ES_LAST); + return (eLang >= LANG_ES_FIRST && eLang <= LANG_ES_LAST); } static bool IsDesktopGLLanguage(const GLLang eLang) { - return (eLang >= LANG_GL_FIRST && eLang <= LANG_GL_LAST); + return (eLang >= LANG_GL_FIRST && eLang <= LANG_GL_LAST); } //Only on vertex inputs and pixel outputs. static int HaveLimitedInOutLocationQualifier(const GLLang eLang, const struct GlExtensions *extensions) { - if(eLang >= LANG_330 || eLang == LANG_ES_300 || eLang == LANG_ES_310 || (extensions && ((struct GlExtensions*)extensions)->ARB_explicit_attrib_location)) + if (eLang >= LANG_330 || eLang == LANG_ES_300 || eLang == LANG_ES_310 || (extensions && ((struct GlExtensions*)extensions)->ARB_explicit_attrib_location)) { return 1; } @@ -93,7 +93,7 @@ static int HaveLimitedInOutLocationQualifier(const GLLang eLang, const struct Gl static int HaveInOutLocationQualifier(const GLLang eLang) { - if(eLang >= LANG_410 || eLang == LANG_ES_310) + if (eLang >= LANG_410 || eLang == LANG_ES_310) { return 1; } @@ -102,13 +102,13 @@ static int HaveInOutLocationQualifier(const GLLang eLang) //layout(binding = X) uniform {uniformA; uniformB;} //layout(location = X) uniform uniform_name; -static int HaveUniformBindingsAndLocations(const GLLang eLang,const struct GlExtensions *extensions, unsigned int flags) +static int HaveUniformBindingsAndLocations(const GLLang eLang, const struct GlExtensions *extensions, unsigned int flags) { - if (flags & HLSLCC_FLAG_DISABLE_EXPLICIT_LOCATIONS) - return 0; + if (flags & HLSLCC_FLAG_DISABLE_EXPLICIT_LOCATIONS) + return 0; - if (eLang >= LANG_430 || eLang == LANG_ES_310 || - (extensions && ((struct GlExtensions*)extensions)->ARB_explicit_uniform_location && ((struct GlExtensions*)extensions)->ARB_shading_language_420pack)) + if (eLang >= LANG_430 || eLang == LANG_ES_310 || + (extensions && ((struct GlExtensions*)extensions)->ARB_explicit_uniform_location && ((struct GlExtensions*)extensions)->ARB_shading_language_420pack)) { return 1; } @@ -117,7 +117,7 @@ static int HaveUniformBindingsAndLocations(const GLLang eLang,const struct GlExt static int DualSourceBlendSupported(const GLLang eLang) { - if(eLang >= LANG_330) + if (eLang >= LANG_330) { return 1; } @@ -126,7 +126,7 @@ static int DualSourceBlendSupported(const GLLang eLang) static int SubroutinesSupported(const GLLang eLang) { - if(eLang >= LANG_400) + if (eLang >= LANG_400) { return 1; } @@ -138,7 +138,7 @@ static int SubroutinesSupported(const GLLang eLang) //HLSL bytecode only tells us the interpolation in pixel shader. static int PixelInterpDependency(const GLLang eLang) { - if(eLang < LANG_430) + if (eLang < LANG_430) { return 1; } @@ -147,166 +147,165 @@ static int PixelInterpDependency(const GLLang eLang) static int HaveUnsignedTypes(const GLLang eLang) { - switch(eLang) - { - case LANG_ES_100: - case LANG_120: - return 0; - default: - break; - } - return 1; + switch (eLang) + { + case LANG_ES_100: + case LANG_120: + return 0; + default: + break; + } + return 1; } static int HaveBitEncodingOps(const GLLang eLang) { - switch(eLang) - { - case LANG_ES_100: - case LANG_120: - return 0; - default: - break; - } - return 1; + switch (eLang) + { + case LANG_ES_100: + case LANG_120: + return 0; + default: + break; + } + return 1; } static int HaveNativeBitwiseOps(const GLLang eLang) { - switch(eLang) - { - case LANG_ES_100: - case LANG_120: - return 0; - default: - break; - } - return 1; + switch (eLang) + { + case LANG_ES_100: + case LANG_120: + return 0; + default: + break; + } + return 1; } static int HaveDynamicIndexing(HLSLCrossCompilerContext *psContext, const Operand* psOperand = NULL) { - // WebGL only allows dynamic indexing with constant expressions, loop indices or a combination. - // The only exception is for uniform access in vertex shaders, which can be indexed using any expression. - - switch(psContext->psShader->eTargetLanguage) - { - case LANG_ES_100: - case LANG_120: - if (psOperand != NULL) - { - if (psOperand->m_ForLoopInductorName) - return 1; - - if (psContext->psShader->eShaderType == VERTEX_SHADER && psOperand->eType == OPERAND_TYPE_CONSTANT_BUFFER) - return 1; - } - - return 0; - default: - break; - } - return 1; + // WebGL only allows dynamic indexing with constant expressions, loop indices or a combination. + // The only exception is for uniform access in vertex shaders, which can be indexed using any expression. + + switch (psContext->psShader->eTargetLanguage) + { + case LANG_ES_100: + case LANG_120: + if (psOperand != NULL) + { + if (psOperand->m_ForLoopInductorName) + return 1; + + if (psContext->psShader->eShaderType == VERTEX_SHADER && psOperand->eType == OPERAND_TYPE_CONSTANT_BUFFER) + return 1; + } + + return 0; + default: + break; + } + return 1; } static int HaveGather(const GLLang eLang) { - if(eLang >= LANG_400 || eLang == LANG_ES_310) - { - return 1; - } - return 0; + if (eLang >= LANG_400 || eLang == LANG_ES_310) + { + return 1; + } + return 0; } static int HaveGatherNonConstOffset(const GLLang eLang) { - if(eLang >= LANG_420 || eLang == LANG_ES_310) - { - return 1; - } - return 0; + if (eLang >= LANG_420 || eLang == LANG_ES_310) + { + return 1; + } + return 0; } - static int HaveQueryLod(const GLLang eLang) { - if(eLang >= LANG_400) - { - return 1; - } - return 0; + if (eLang >= LANG_400) + { + return 1; + } + return 0; } static int HaveQueryLevels(const GLLang eLang) { - if(eLang >= LANG_430) - { - return 1; - } - return 0; + if (eLang >= LANG_430) + { + return 1; + } + return 0; } static int HaveFragmentCoordConventions(const GLLang eLang) { - if(eLang >= LANG_150) - { - return 1; - } - return 0; + if (eLang >= LANG_150) + { + return 1; + } + return 0; } static int HaveGeometryShaderARB(const GLLang eLang) { - if(eLang >= LANG_150) - { - return 1; - } - return 0; + if (eLang >= LANG_150) + { + return 1; + } + return 0; } static int HaveAtomicCounter(const GLLang eLang) { - if(eLang >= LANG_420 || eLang == LANG_ES_310) - { - return 1; - } - return 0; + if (eLang >= LANG_420 || eLang == LANG_ES_310) + { + return 1; + } + return 0; } static int HaveAtomicMem(const GLLang eLang) { - if (eLang >= LANG_430 || eLang == LANG_ES_310) - { - return 1; - } - return 0; + if (eLang >= LANG_430 || eLang == LANG_ES_310) + { + return 1; + } + return 0; } static int HaveImageAtomics(const GLLang eLang) { - if (eLang >= LANG_420) - { - return 1; - } - return 0; + if (eLang >= LANG_420) + { + return 1; + } + return 0; } static int HaveCompute(const GLLang eLang) { - if(eLang >= LANG_430 || eLang == LANG_ES_310) - { - return 1; - } - return 0; + if (eLang >= LANG_430 || eLang == LANG_ES_310) + { + return 1; + } + return 0; } static int HaveImageLoadStore(const GLLang eLang) { - if(eLang >= LANG_420 || eLang == LANG_ES_310) - { - return 1; - } - return 0; + if (eLang >= LANG_420 || eLang == LANG_ES_310) + { + return 1; + } + return 0; } #endif diff --git a/src/internal_includes/reflect.h b/src/internal_includes/reflect.h index e7c801d..ddc468c 100644 --- a/src/internal_includes/reflect.h +++ b/src/internal_includes/reflect.h @@ -13,9 +13,9 @@ typedef struct uint32_t* pui32Interfaces; uint32_t* pui32Inputs11; uint32_t* pui32Outputs11; - uint32_t* pui32OutputsWithStreams; - uint32_t* pui32PatchConstants; - uint32_t* pui32PatchConstants11; + uint32_t* pui32OutputsWithStreams; + uint32_t* pui32PatchConstants; + uint32_t* pui32PatchConstants11; } ReflectionChunks; void LoadShaderInfo(const uint32_t ui32MajorVersion, @@ -24,4 +24,3 @@ void LoadShaderInfo(const uint32_t ui32MajorVersion, ShaderInfo* psInfo, uint32_t decodeFlags); #endif - diff --git a/src/internal_includes/toGLSL.h b/src/internal_includes/toGLSL.h index 87ffdf7..c14337a 100644 --- a/src/internal_includes/toGLSL.h +++ b/src/internal_includes/toGLSL.h @@ -8,121 +8,122 @@ class HLSLCrossCompilerContext; class ToGLSL : public Translator { protected: - GLLang language; + GLLang language; bool m_NeedUnityInstancingArraySizeDecl; public: - explicit ToGLSL(HLSLCrossCompilerContext *ctx) : Translator(ctx), language(LANG_DEFAULT), m_NeedUnityInstancingArraySizeDecl(false), m_NumDeclaredWhileTrueLoops(0) {} - // Sets the target language according to given input. if LANG_DEFAULT, does autodetect and returns the selected language - GLLang SetLanguage(GLLang suggestedLanguage); + explicit ToGLSL(HLSLCrossCompilerContext *ctx) : Translator(ctx), language(LANG_DEFAULT), m_NeedUnityInstancingArraySizeDecl(false), m_NumDeclaredWhileTrueLoops(0) {} + // Sets the target language according to given input. if LANG_DEFAULT, does autodetect and returns the selected language + GLLang SetLanguage(GLLang suggestedLanguage); - virtual bool Translate(); - virtual void TranslateDeclaration(const Declaration* psDecl); - virtual bool TranslateSystemValue(const Operand *psOperand, const ShaderInfo::InOutSignature *sig, std::string &result, uint32_t *pui32IgnoreSwizzle, bool isIndexed, bool isInput, bool *outSkipPrefix = NULL, int *iIgnoreRedirect = NULL); - virtual void SetIOPrefixes(); + virtual bool Translate(); + virtual void TranslateDeclaration(const Declaration* psDecl); + virtual bool TranslateSystemValue(const Operand *psOperand, const ShaderInfo::InOutSignature *sig, std::string &result, uint32_t *pui32IgnoreSwizzle, bool isIndexed, bool isInput, bool *outSkipPrefix = NULL, int *iIgnoreRedirect = NULL); + virtual void SetIOPrefixes(); private: - // Vulkan-only: detect which branches only depend on uniforms and immediate values and can be turned into specialization constants. - void IdentifyStaticBranches(ShaderPhase *psPhase); - // May return false when we detect too complex stuff (matrices, arrays etc) - bool BuildStaticBranchNameForInstruction(Instruction &inst); + // Vulkan-only: detect which branches only depend on uniforms and immediate values and can be turned into specialization constants. + void IdentifyStaticBranches(ShaderPhase *psPhase); + // May return false when we detect too complex stuff (matrices, arrays etc) + bool BuildStaticBranchNameForInstruction(Instruction &inst); - void DeclareSpecializationConstants(ShaderPhase &phase); + void DeclareSpecializationConstants(ShaderPhase &phase); - void TranslateOperand(bstring glsl, const Operand *psOp, uint32_t flags, uint32_t ui32ComponentMask = OPERAND_4_COMPONENT_MASK_ALL); - void TranslateOperand(const Operand *psOp, uint32_t flags, uint32_t ui32ComponentMask = OPERAND_4_COMPONENT_MASK_ALL); - void TranslateInstruction(Instruction* psInst, bool isEmbedded = false); + void TranslateOperand(bstring glsl, const Operand *psOp, uint32_t flags, uint32_t ui32ComponentMask = OPERAND_4_COMPONENT_MASK_ALL); + void TranslateOperand(const Operand *psOp, uint32_t flags, uint32_t ui32ComponentMask = OPERAND_4_COMPONENT_MASK_ALL); + void TranslateInstruction(Instruction* psInst, bool isEmbedded = false); - void TranslateVariableNameWithMask(bstring glsl, const Operand* psOperand, uint32_t ui32TOFlag, uint32_t* pui32IgnoreSwizzle, uint32_t ui32CompMask, int *piRebase); - void TranslateVariableNameWithMask(const Operand* psOperand, uint32_t ui32TOFlag, uint32_t* pui32IgnoreSwizzle, uint32_t ui32CompMask, int *piRebase); + void TranslateVariableNameWithMask(bstring glsl, const Operand* psOperand, uint32_t ui32TOFlag, uint32_t* pui32IgnoreSwizzle, uint32_t ui32CompMask, int *piRebase); + void TranslateVariableNameWithMask(const Operand* psOperand, uint32_t ui32TOFlag, uint32_t* pui32IgnoreSwizzle, uint32_t ui32CompMask, int *piRebase); - void TranslateOperandIndex(const Operand* psOperand, int index); - void TranslateOperandIndexMAD(const Operand* psOperand, int index, uint32_t multiply, uint32_t add); + void TranslateOperandIndex(const Operand* psOperand, int index); + void TranslateOperandIndexMAD(const Operand* psOperand, int index, uint32_t multiply, uint32_t add); - void AddOpAssignToDestWithMask(const Operand* psDest, - SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, const char *szAssignmentOp, int *pNeedsParenthesis, uint32_t ui32CompMask); - void AddAssignToDest(const Operand* psDest, - SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, int* pNeedsParenthesis); - void AddAssignPrologue(int numParenthesis, bool isEmbedded = false); + void AddOpAssignToDestWithMask(const Operand* psDest, + SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, const char *szAssignmentOp, int *pNeedsParenthesis, uint32_t ui32CompMask); + void AddAssignToDest(const Operand* psDest, + SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, int* pNeedsParenthesis); + void AddAssignPrologue(int numParenthesis, bool isEmbedded = false); - void AddBuiltinOutput(const Declaration* psDecl, int arrayElements, const char* builtinName); - void AddBuiltinInput(const Declaration* psDecl, const char* builtinName); - void HandleOutputRedirect(const Declaration *psDecl, const char *Precision); - void HandleInputRedirect(const Declaration *psDecl, const char *Precision); + void AddBuiltinOutput(const Declaration* psDecl, int arrayElements, const char* builtinName); + void AddBuiltinInput(const Declaration* psDecl, const char* builtinName); + void HandleOutputRedirect(const Declaration *psDecl, const char *Precision); + void HandleInputRedirect(const Declaration *psDecl, const char *Precision); - void AddUserOutput(const Declaration* psDecl); - void DeclareStructConstants(const uint32_t ui32BindingPoint, const ConstantBuffer* psCBuf, const Operand* psOperand, bstring glsl); + void AddUserOutput(const Declaration* psDecl); + void DeclareStructConstants(const uint32_t ui32BindingPoint, const ConstantBuffer* psCBuf, const Operand* psOperand, bstring glsl); void DeclareConstBufferShaderVariable(const char* varName, const struct ShaderVarType* psType, const struct ConstantBuffer* psCBuf, int unsizedArray, bool addUniformPrefix = false); void PreDeclareStructType(const std::string &name, const struct ShaderVarType* psType); void DeclareUBOConstants(const uint32_t ui32BindingPoint, const ConstantBuffer* psCBuf, bstring glsl); - typedef enum - { - CMP_EQ, - CMP_LT, - CMP_GE, - CMP_NE, - } ComparisonType; - - void AddComparison(Instruction* psInst, ComparisonType eType, - uint32_t typeFlag); - - void AddMOVBinaryOp(const Operand *pDest, Operand *pSrc, bool isEmbedded = false); - void AddMOVCBinaryOp(const Operand *pDest, const Operand *src0, Operand *src1, Operand *src2); - void CallBinaryOp(const char* name, Instruction* psInst, - int dest, int src0, int src1, SHADER_VARIABLE_TYPE eDataType, bool isEmbedded = false); - void CallTernaryOp(const char* op1, const char* op2, Instruction* psInst, - int dest, int src0, int src1, int src2, uint32_t dataType); - void CallHelper3(const char* name, Instruction* psInst, - int dest, int src0, int src1, int src2, int paramsShouldFollowWriteMask); - void CallHelper2(const char* name, Instruction* psInst, - int dest, int src0, int src1, int paramsShouldFollowWriteMask); - void CallHelper2Int(const char* name, Instruction* psInst, - int dest, int src0, int src1, int paramsShouldFollowWriteMask); - void CallHelper2UInt(const char* name, Instruction* psInst, - int dest, int src0, int src1, int paramsShouldFollowWriteMask); - void CallHelper1(const char* name, Instruction* psInst, - int dest, int src0, int paramsShouldFollowWriteMask); - void CallHelper1Int( - const char* name, - Instruction* psInst, - const int dest, - const int src0, - int paramsShouldFollowWriteMask); - void TranslateTexelFetch( - Instruction* psInst, - const ResourceBinding* psBinding, - bstring glsl); - void TranslateTexCoord( - const RESOURCE_DIMENSION eResDim, - Operand* psTexCoordOperand); - void GetResInfoData(Instruction* psInst, int index, int destElem); - void TranslateTextureSample(Instruction* psInst, - uint32_t ui32Flags); - void TranslateDynamicComponentSelection(const ShaderVarType* psVarType, - const Operand* psByteAddr, uint32_t offset, uint32_t mask); - void TranslateShaderStorageStore(Instruction* psInst); - void TranslateShaderStorageLoad(Instruction* psInst); - void TranslateAtomicMemOp(Instruction* psInst); - void TranslateConditional( - Instruction* psInst, - bstring glsl); - - // Add an extra function to the m_FunctionDefinitions list, unless it's already there. - bool DeclareExtraFunction(const std::string &name, bstring body); - void UseExtraFunctionDependency(const std::string &name); - - void DeclareDynamicIndexWrapper(const struct ShaderVarType* psType); - void DeclareDynamicIndexWrapper(const char* psName, SHADER_VARIABLE_CLASS eClass, SHADER_VARIABLE_TYPE eType, uint32_t ui32Rows, uint32_t ui32Columns, uint32_t ui32Elements); - - bool RenderTargetDeclared(uint32_t input); - - std::string GetVulkanDummySamplerName(); - - // A map of extra helper functions we'll need. - FunctionDefinitions m_FunctionDefinitions; - - std::set m_DeclaredRenderTarget; - int m_NumDeclaredWhileTrueLoops; + typedef enum + { + CMP_EQ, + CMP_LT, + CMP_GE, + CMP_NE, + } ComparisonType; + + void AddComparison(Instruction* psInst, ComparisonType eType, + uint32_t typeFlag); + + void AddMOVBinaryOp(const Operand *pDest, Operand *pSrc, bool isEmbedded = false); + void AddMOVCBinaryOp(const Operand *pDest, const Operand *src0, Operand *src1, Operand *src2); + void CallBinaryOp(const char* name, Instruction* psInst, + int dest, int src0, int src1, SHADER_VARIABLE_TYPE eDataType, bool isEmbedded = false); + void CallTernaryOp(const char* op1, const char* op2, Instruction* psInst, + int dest, int src0, int src1, int src2, uint32_t dataType); + void CallHelper3(const char* name, Instruction* psInst, + int dest, int src0, int src1, int src2, int paramsShouldFollowWriteMask); + void CallHelper2(const char* name, Instruction* psInst, + int dest, int src0, int src1, int paramsShouldFollowWriteMask); + void CallHelper2Int(const char* name, Instruction* psInst, + int dest, int src0, int src1, int paramsShouldFollowWriteMask); + void CallHelper2UInt(const char* name, Instruction* psInst, + int dest, int src0, int src1, int paramsShouldFollowWriteMask); + void CallHelper1(const char* name, Instruction* psInst, + int dest, int src0, int paramsShouldFollowWriteMask); + void CallHelper1Int( + const char* name, + Instruction* psInst, + const int dest, + const int src0, + int paramsShouldFollowWriteMask); + void TranslateTexelFetch( + Instruction* psInst, + const ResourceBinding* psBinding, + bstring glsl); + void TranslateTexCoord( + const RESOURCE_DIMENSION eResDim, + Operand* psTexCoordOperand); + void GetResInfoData(Instruction* psInst, int index, int destElem); + void TranslateTextureSample(Instruction* psInst, + uint32_t ui32Flags); + void TranslateDynamicComponentSelection(const ShaderVarType* psVarType, + const Operand* psByteAddr, uint32_t offset, uint32_t mask); + void TranslateShaderStorageStore(Instruction* psInst); + void TranslateShaderStorageLoad(Instruction* psInst); + void TranslateAtomicMemOp(Instruction* psInst); + void TranslateConditional( + Instruction* psInst, + bstring glsl); + + // Add an extra function to the m_FunctionDefinitions list, unless it's already there. + bool DeclareExtraFunction(const std::string &name, bstring body); + void UseExtraFunctionDependency(const std::string &name); + + void DeclareDynamicIndexWrapper(const struct ShaderVarType* psType); + void DeclareDynamicIndexWrapper(const char* psName, SHADER_VARIABLE_CLASS eClass, SHADER_VARIABLE_TYPE eType, uint32_t ui32Rows, uint32_t ui32Columns, uint32_t ui32Elements); + + bool RenderTargetDeclared(uint32_t input); + + std::string GetVulkanDummySamplerName(); + + // A map of extra helper functions we'll need. + FunctionDefinitions m_FunctionDefinitions; + std::vector m_FunctionDefinitionsOrder; + + std::set m_DeclaredRenderTarget; + int m_NumDeclaredWhileTrueLoops; }; diff --git a/src/internal_includes/toMetal.h b/src/internal_includes/toMetal.h index b24ae01..d3beed6 100644 --- a/src/internal_includes/toMetal.h +++ b/src/internal_includes/toMetal.h @@ -1,4 +1,3 @@ - #pragma once #include "internal_includes/Translator.h" #include @@ -6,174 +5,172 @@ struct SamplerDesc { - std::string name; - uint32_t reg, slot; + std::string name; + uint32_t reg, slot; }; struct TextureSamplerDesc { - std::string name; - int textureBind, samplerBind; - HLSLCC_TEX_DIMENSION dim; - bool isMultisampled; - bool isDepthSampler; - bool uav; + std::string name; + int textureBind, samplerBind; + HLSLCC_TEX_DIMENSION dim; + bool isMultisampled; + bool isDepthSampler; + bool uav; }; class ToMetal : public Translator { protected: - GLLang language; + GLLang language; public: - explicit ToMetal(HLSLCrossCompilerContext *ctx) + explicit ToMetal(HLSLCrossCompilerContext *ctx) : Translator(ctx) , m_ShadowSamplerDeclared(false) , m_NeedFBOutputRemapDecl(false) , m_NeedFBInputRemapDecl(false) {} - virtual bool Translate(); - virtual void TranslateDeclaration(const Declaration *psDecl); - virtual bool TranslateSystemValue(const Operand *psOperand, const ShaderInfo::InOutSignature *sig, std::string &result, uint32_t *pui32IgnoreSwizzle, bool isIndexed, bool isInput, bool *outSkipPrefix = NULL, int *iIgnoreRedirect = NULL); - std::string TranslateOperand(const Operand *psOp, uint32_t flags, uint32_t ui32ComponentMask = OPERAND_4_COMPONENT_MASK_ALL); + virtual bool Translate(); + virtual void TranslateDeclaration(const Declaration *psDecl); + virtual bool TranslateSystemValue(const Operand *psOperand, const ShaderInfo::InOutSignature *sig, std::string &result, uint32_t *pui32IgnoreSwizzle, bool isIndexed, bool isInput, bool *outSkipPrefix = NULL, int *iIgnoreRedirect = NULL); + std::string TranslateOperand(const Operand *psOp, uint32_t flags, uint32_t ui32ComponentMask = OPERAND_4_COMPONENT_MASK_ALL); - virtual void SetIOPrefixes(); + virtual void SetIOPrefixes(); private: - void TranslateInstruction(Instruction* psInst); - - void DeclareBuiltinInput(const Declaration *psDecl); - void DeclareBuiltinOutput(const Declaration *psDecl); - void DeclareClipPlanes(const Declaration* decl, unsigned declCount); - void GenerateTexturesReflection(HLSLccReflection* refl); - - // Retrieve the name of the output struct for this shader - std::string GetOutputStructName() const; - std::string GetInputStructName() const; - std::string GetCBName(const std::string& cbName) const; - - void DeclareHullShaderPassthrough(); - void HandleInputRedirect(const Declaration *psDecl, const std::string &typeName); - void HandleOutputRedirect(const Declaration *psDecl, const std::string &typeName); - - void DeclareConstantBuffer(const ConstantBuffer *psCBuf, uint32_t ui32BindingPoint); - void DeclareStructType(const std::string &name, const std::vector &contents, bool withinCB = false, uint32_t cumulativeOffset = 0, bool stripUnused = false); - void DeclareStructType(const std::string &name, const std::vector &contents, bool withinCB = false, uint32_t cumulativeOffset = 0); - void DeclareStructVariable(const std::string &parentName, const ShaderVar &var, bool withinCB = false, uint32_t cumulativeOffset = 0); - void DeclareStructVariable(const std::string &parentName, const ShaderVarType &var, bool withinCB = false, uint32_t cumulativeOffset = 0); - void DeclareBufferVariable(const Declaration *psDecl, bool isRaw, bool isUAV); - - void DeclareResource(const Declaration *psDecl); - void TranslateResourceTexture(const Declaration* psDecl, uint32_t samplerCanDoShadowCmp, HLSLCC_TEX_DIMENSION texDim); - - void DeclareOutput(const Declaration *decl); - - void PrintStructDeclarations(StructDefinitions &defs, const char *name = ""); - - std::string ResourceName(ResourceGroup group, const uint32_t ui32RegisterNumber); - - // ToMetalOperand.cpp - std::string TranslateOperandSwizzle(const Operand* psOperand, uint32_t ui32ComponentMask, int iRebase, bool includeDot = true); - std::string TranslateOperandIndex(const Operand* psOperand, int index); - std::string TranslateVariableName(const Operand* psOperand, uint32_t ui32TOFlag, uint32_t* pui32IgnoreSwizzle, uint32_t ui32CompMask, int *piRebase); - - // ToMetalInstruction.cpp - - void AddOpAssignToDestWithMask(const Operand* psDest, - SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, const char *szAssignmentOp, int *pNeedsParenthesis, uint32_t ui32CompMask); - void AddAssignToDest(const Operand* psDest, - SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, int* pNeedsParenthesis); - void AddAssignPrologue(int numParenthesis); - - typedef enum - { - CMP_EQ, - CMP_LT, - CMP_GE, - CMP_NE, - } ComparisonType; - - void AddComparison(Instruction* psInst, ComparisonType eType, - uint32_t typeFlag); - - bool CanForceToHalfOperand(const Operand *psOperand); - - void AddMOVBinaryOp(const Operand *pDest, Operand *pSrc); - void AddMOVCBinaryOp(const Operand *pDest, const Operand *src0, Operand *src1, Operand *src2); - void CallBinaryOp(const char* name, Instruction* psInst, - int dest, int src0, int src1, SHADER_VARIABLE_TYPE eDataType); - void CallTernaryOp(const char* op1, const char* op2, Instruction* psInst, - int dest, int src0, int src1, int src2, uint32_t dataType); - void CallHelper3(const char* name, Instruction* psInst, - int dest, int src0, int src1, int src2, int paramsShouldFollowWriteMask, uint32_t ui32Flags); - void CallHelper3(const char* name, Instruction* psInst, - int dest, int src0, int src1, int src2, int paramsShouldFollowWriteMask); - void CallHelper2(const char* name, Instruction* psInst, - int dest, int src0, int src1, int paramsShouldFollowWriteMask); - void CallHelper2Int(const char* name, Instruction* psInst, - int dest, int src0, int src1, int paramsShouldFollowWriteMask); - void CallHelper2UInt(const char* name, Instruction* psInst, - int dest, int src0, int src1, int paramsShouldFollowWriteMask); - void CallHelper1(const char* name, Instruction* psInst, - int dest, int src0, int paramsShouldFollowWriteMask); - void CallHelper1Int( - const char* name, - Instruction* psInst, - const int dest, - const int src0, - int paramsShouldFollowWriteMask); - void TranslateTexelFetch( - Instruction* psInst, - const ResourceBinding* psBinding, - bstring glsl); - void TranslateTexelFetchOffset( - Instruction* psInst, - const ResourceBinding* psBinding, - bstring glsl); - void TranslateTexCoord( - const RESOURCE_DIMENSION eResDim, - Operand* psTexCoordOperand); - void GetResInfoData(Instruction* psInst, int index, int destElem); - void TranslateTextureSample(Instruction* psInst, - uint32_t ui32Flags); - void TranslateDynamicComponentSelection(const ShaderVarType* psVarType, - const Operand* psByteAddr, uint32_t offset, uint32_t mask); - void TranslateShaderStorageStore(Instruction* psInst); - void TranslateShaderStorageLoad(Instruction* psInst); - void TranslateAtomicMemOp(Instruction* psInst); - void TranslateConditional( - Instruction* psInst, - bstring glsl); - - // The map is keyed by struct name. The special name "" (empty string) is reserved for entry point function parameters - StructDefinitions m_StructDefinitions; - - // A map of extra helper functions we'll need. - FunctionDefinitions m_FunctionDefinitions; - - BindingSlotAllocator m_TextureSlots, m_SamplerSlots; - BindingSlotAllocator m_BufferSlots; - - std::vector m_Samplers; - std::vector m_Textures; - - std::string m_ExtraGlobalDefinitions; + void TranslateInstruction(Instruction* psInst); + + void DeclareBuiltinInput(const Declaration *psDecl); + void DeclareBuiltinOutput(const Declaration *psDecl); + void DeclareClipPlanes(const Declaration* decl, unsigned declCount); + void GenerateTexturesReflection(HLSLccReflection* refl); + + // Retrieve the name of the output struct for this shader + std::string GetOutputStructName() const; + std::string GetInputStructName() const; + std::string GetCBName(const std::string& cbName) const; + + void DeclareHullShaderPassthrough(); + void HandleInputRedirect(const Declaration *psDecl, const std::string &typeName); + void HandleOutputRedirect(const Declaration *psDecl, const std::string &typeName); + + void DeclareConstantBuffer(const ConstantBuffer *psCBuf, uint32_t ui32BindingPoint); + void DeclareStructType(const std::string &name, const std::vector &contents, bool withinCB = false, uint32_t cumulativeOffset = 0, bool stripUnused = false); + void DeclareStructType(const std::string &name, const std::vector &contents, bool withinCB = false, uint32_t cumulativeOffset = 0); + void DeclareStructVariable(const std::string &parentName, const ShaderVar &var, bool withinCB = false, uint32_t cumulativeOffset = 0, bool isUsed = true); + void DeclareStructVariable(const std::string &parentName, const ShaderVarType &var, bool withinCB = false, uint32_t cumulativeOffset = 0, bool isUsed = true); + void DeclareBufferVariable(const Declaration *psDecl, bool isRaw, bool isUAV); + + void DeclareResource(const Declaration *psDecl); + void TranslateResourceTexture(const Declaration* psDecl, uint32_t samplerCanDoShadowCmp, HLSLCC_TEX_DIMENSION texDim); + + void DeclareOutput(const Declaration *decl); + + void PrintStructDeclarations(StructDefinitions &defs, const char *name = ""); + + std::string ResourceName(ResourceGroup group, const uint32_t ui32RegisterNumber); + + // ToMetalOperand.cpp + std::string TranslateOperandSwizzle(const Operand* psOperand, uint32_t ui32ComponentMask, int iRebase, bool includeDot = true); + std::string TranslateOperandIndex(const Operand* psOperand, int index); + std::string TranslateVariableName(const Operand* psOperand, uint32_t ui32TOFlag, uint32_t* pui32IgnoreSwizzle, uint32_t ui32CompMask, int *piRebase); + + // ToMetalInstruction.cpp + + void AddOpAssignToDestWithMask(const Operand* psDest, + SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, const char *szAssignmentOp, int *pNeedsParenthesis, uint32_t ui32CompMask); + void AddAssignToDest(const Operand* psDest, + SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, int* pNeedsParenthesis); + void AddAssignPrologue(int numParenthesis); + + typedef enum + { + CMP_EQ, + CMP_LT, + CMP_GE, + CMP_NE, + } ComparisonType; + + void AddComparison(Instruction* psInst, ComparisonType eType, + uint32_t typeFlag); + + bool CanForceToHalfOperand(const Operand *psOperand); + + void AddMOVBinaryOp(const Operand *pDest, Operand *pSrc); + void AddMOVCBinaryOp(const Operand *pDest, const Operand *src0, Operand *src1, Operand *src2); + void CallBinaryOp(const char* name, Instruction* psInst, + int dest, int src0, int src1, SHADER_VARIABLE_TYPE eDataType); + void CallTernaryOp(const char* op1, const char* op2, Instruction* psInst, + int dest, int src0, int src1, int src2, uint32_t dataType); + void CallHelper3(const char* name, Instruction* psInst, + int dest, int src0, int src1, int src2, int paramsShouldFollowWriteMask, uint32_t ui32Flags); + void CallHelper3(const char* name, Instruction* psInst, + int dest, int src0, int src1, int src2, int paramsShouldFollowWriteMask); + void CallHelper2(const char* name, Instruction* psInst, + int dest, int src0, int src1, int paramsShouldFollowWriteMask); + void CallHelper2Int(const char* name, Instruction* psInst, + int dest, int src0, int src1, int paramsShouldFollowWriteMask); + void CallHelper2UInt(const char* name, Instruction* psInst, + int dest, int src0, int src1, int paramsShouldFollowWriteMask); + void CallHelper1(const char* name, Instruction* psInst, + int dest, int src0, int paramsShouldFollowWriteMask); + void CallHelper1Int( + const char* name, + Instruction* psInst, + const int dest, + const int src0, + int paramsShouldFollowWriteMask); + void TranslateTexelFetch( + Instruction* psInst, + const ResourceBinding* psBinding, + bstring glsl); + void TranslateTexelFetchOffset( + Instruction* psInst, + const ResourceBinding* psBinding, + bstring glsl); + void TranslateTexCoord( + const RESOURCE_DIMENSION eResDim, + Operand* psTexCoordOperand); + void GetResInfoData(Instruction* psInst, int index, int destElem); + void TranslateTextureSample(Instruction* psInst, + uint32_t ui32Flags); + void TranslateDynamicComponentSelection(const ShaderVarType* psVarType, + const Operand* psByteAddr, uint32_t offset, uint32_t mask); + void TranslateShaderStorageStore(Instruction* psInst); + void TranslateShaderStorageLoad(Instruction* psInst); + void TranslateAtomicMemOp(Instruction* psInst); + void TranslateConditional( + Instruction* psInst, + bstring glsl); + + // The map is keyed by struct name. The special name "" (empty string) is reserved for entry point function parameters + StructDefinitions m_StructDefinitions; + + // A map of extra helper functions we'll need. + FunctionDefinitions m_FunctionDefinitions; + + BindingSlotAllocator m_TextureSlots, m_SamplerSlots; + BindingSlotAllocator m_BufferSlots; + + std::vector m_Samplers; + std::vector m_Textures; + + std::string m_ExtraGlobalDefinitions; // Flags for whether we need to add the declaration for the FB IO remaps bool m_NeedFBInputRemapDecl; bool m_NeedFBOutputRemapDecl; - - bool m_ShadowSamplerDeclared; - void EnsureShadowSamplerDeclared(); + bool m_ShadowSamplerDeclared; - // Add an extra function to the m_FunctionDefinitions list, unless it's already there. - void DeclareExtraFunction(const std::string &name, const std::string &body); + void EnsureShadowSamplerDeclared(); - // Move all lowp -> mediump - void ClampPartialPrecisions(); - - // Reseve UAV slots in advance to match the original HLSL bindings -> correct bindings in SetRandomWriteTarget() - void ReserveUAVBindingSlots(ShaderPhase *phase); -}; + // Add an extra function to the m_FunctionDefinitions list, unless it's already there. + void DeclareExtraFunction(const std::string &name, const std::string &body); + // Move all lowp -> mediump + void ClampPartialPrecisions(); + // Reseve UAV slots in advance to match the original HLSL bindings -> correct bindings in SetRandomWriteTarget() + void ReserveUAVBindingSlots(ShaderPhase *phase); +}; diff --git a/src/internal_includes/toMetalDeclaration.h b/src/internal_includes/toMetalDeclaration.h index f51f48c..2052009 100644 --- a/src/internal_includes/toMetalDeclaration.h +++ b/src/internal_includes/toMetalDeclaration.h @@ -1,3 +1,3 @@ #pragma once -#include "internal_includes/Declaration.h" \ No newline at end of file +#include "internal_includes/Declaration.h" diff --git a/src/internal_includes/tokens.h b/src/internal_includes/tokens.h index 6c24535..912234b 100644 --- a/src/internal_includes/tokens.h +++ b/src/internal_includes/tokens.h @@ -5,17 +5,17 @@ enum SHADER_PHASE_TYPE { - SHADER_PHASE_INVALID = -1, - MAIN_PHASE = 0, - HS_GLOBAL_DECL_PHASE = 1, - HS_CTRL_POINT_PHASE = 2, - HS_FORK_PHASE = 3, - HS_JOIN_PHASE = 4 + SHADER_PHASE_INVALID = -1, + MAIN_PHASE = 0, + HS_GLOBAL_DECL_PHASE = 1, + HS_CTRL_POINT_PHASE = 2, + HS_FORK_PHASE = 3, + HS_JOIN_PHASE = 4 }; static SHADER_TYPE DecodeShaderType(uint32_t ui32Token) { - return (SHADER_TYPE)((ui32Token & 0xffff0000) >> 16); + return (SHADER_TYPE)((ui32Token & 0xffff0000) >> 16); } static uint32_t DecodeProgramMajorVersion(uint32_t ui32Token) @@ -51,25 +51,24 @@ static EXTENDED_OPCODE_TYPE DecodeExtendedOpcodeType(uint32_t ui32Token) return (EXTENDED_OPCODE_TYPE)(ui32Token & 0x0000003f); } - static RESOURCE_RETURN_TYPE DecodeResourceReturnType(uint32_t ui32Coord, uint32_t ui32Token) { - return (RESOURCE_RETURN_TYPE)((ui32Token>>(ui32Coord * 4))&0xF); + return (RESOURCE_RETURN_TYPE)((ui32Token >> (ui32Coord * 4)) & 0xF); } static RESOURCE_RETURN_TYPE DecodeExtendedResourceReturnType(uint32_t ui32Coord, uint32_t ui32Token) { - return (RESOURCE_RETURN_TYPE)((ui32Token>>(ui32Coord * 4 + 6))&0xF); + return (RESOURCE_RETURN_TYPE)((ui32Token >> (ui32Coord * 4 + 6)) & 0xF); } enum OPCODE_TYPE { //For DX9 - OPCODE_POW = -6, - OPCODE_DP2ADD = -5, - OPCODE_LRP = -4, - OPCODE_ENDREP = -3, - OPCODE_REP = -2, + OPCODE_POW = -6, + OPCODE_DP2ADD = -5, + OPCODE_LRP = -4, + OPCODE_ENDREP = -3, + OPCODE_REP = -2, OPCODE_SPECIAL_DCL_IMMCONST = -1, OPCODE_ADD, @@ -183,7 +182,7 @@ enum OPCODE_TYPE // ----------------------------------------------- OPCODE_RESERVED_10, - + // ---------- DX 10.1 op codes--------------------- OPCODE_LOD, @@ -234,7 +233,7 @@ enum OPCODE_TYPE OPCODE_DCL_FUNCTION_BODY, OPCODE_DCL_FUNCTION_TABLE, OPCODE_DCL_INTERFACE, - + OPCODE_DCL_INPUT_CONTROL_POINT_COUNT, OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT, OPCODE_DCL_TESS_DOMAIN, @@ -278,9 +277,9 @@ enum OPCODE_TYPE OPCODE_IMM_ATOMIC_IMAX, OPCODE_IMM_ATOMIC_IMIN, OPCODE_IMM_ATOMIC_UMAX, - OPCODE_IMM_ATOMIC_UMIN, + OPCODE_IMM_ATOMIC_UMIN, OPCODE_SYNC, - + OPCODE_DADD, OPCODE_DMAX, OPCODE_DMIN, @@ -297,7 +296,7 @@ enum OPCODE_TYPE OPCODE_EVAL_SNAPPED, OPCODE_EVAL_SAMPLE_INDEX, OPCODE_EVAL_CENTROID, - + OPCODE_DCL_GS_INSTANCE_COUNT, OPCODE_ABORT, @@ -343,14 +342,14 @@ typedef enum static OPERAND_INDEX_DIMENSION DecodeOperandIndexDimension(uint32_t ui32Token) { - return (OPERAND_INDEX_DIMENSION)((ui32Token & 0x00300000) >> 20); + return (OPERAND_INDEX_DIMENSION)((ui32Token & 0x00300000) >> 20); } typedef enum OPERAND_TYPE { OPERAND_TYPE_SPECIAL_LOOPCOUNTER = -10, - OPERAND_TYPE_SPECIAL_IMMCONSTINT = -9, - OPERAND_TYPE_SPECIAL_TEXCOORD = -8, + OPERAND_TYPE_SPECIAL_IMMCONSTINT = -9, + OPERAND_TYPE_SPECIAL_TEXCOORD = -8, OPERAND_TYPE_SPECIAL_POSITION = -7, OPERAND_TYPE_SPECIAL_FOG = -6, OPERAND_TYPE_SPECIAL_POINTSIZE = -5, @@ -363,25 +362,25 @@ typedef enum OPERAND_TYPE OPERAND_TYPE_OUTPUT = 2, // General Output Register File OPERAND_TYPE_INDEXABLE_TEMP = 3, // Temporary Register File (indexable) OPERAND_TYPE_IMMEDIATE32 = 4, // 32bit/component immediate value(s) - // If for example, operand token bits - // [01:00]==OPERAND_4_COMPONENT, - // this means that the operand type: - // OPERAND_TYPE_IMMEDIATE32 - // results in 4 additional 32bit - // DWORDS present for the operand. + // If for example, operand token bits + // [01:00]==OPERAND_4_COMPONENT, + // this means that the operand type: + // OPERAND_TYPE_IMMEDIATE32 + // results in 4 additional 32bit + // DWORDS present for the operand. OPERAND_TYPE_IMMEDIATE64 = 5, // 64bit/comp.imm.val(s)HI:LO OPERAND_TYPE_SAMPLER = 6, // Reference to sampler state OPERAND_TYPE_RESOURCE = 7, // Reference to memory resource (e.g. texture) - OPERAND_TYPE_CONSTANT_BUFFER= 8, // Reference to constant buffer - OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER= 9, // Reference to immediate constant buffer + OPERAND_TYPE_CONSTANT_BUFFER = 8, // Reference to constant buffer + OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER = 9, // Reference to immediate constant buffer OPERAND_TYPE_LABEL = 10, // Label OPERAND_TYPE_INPUT_PRIMITIVEID = 11, // Input primitive ID OPERAND_TYPE_OUTPUT_DEPTH = 12, // Output Depth OPERAND_TYPE_NULL = 13, // Null register, used to discard results of operations - // Below Are operands new in DX 10.1 + // Below Are operands new in DX 10.1 OPERAND_TYPE_RASTERIZER = 14, // DX10.1 Rasterizer register, used to denote the depth/stencil and render target resources OPERAND_TYPE_OUTPUT_COVERAGE_MASK = 15, // DX10.1 PS output MSAA coverage mask (scalar) - // Below Are operands new in DX 11 + // Below Are operands new in DX 11 OPERAND_TYPE_STREAM = 16, // Reference to GS stream output resource OPERAND_TYPE_FUNCTION_BODY = 17, // Reference to a function definition OPERAND_TYPE_FUNCTION_TABLE = 18, // Reference to a set of functions used by a class @@ -411,30 +410,30 @@ typedef enum OPERAND_TYPE static OPERAND_TYPE DecodeOperandType(uint32_t ui32Token) { - return (OPERAND_TYPE)((ui32Token & 0x000ff000) >> 12); + return (OPERAND_TYPE)((ui32Token & 0x000ff000) >> 12); } static SPECIAL_NAME DecodeOperandSpecialName(uint32_t ui32Token) { - return (SPECIAL_NAME)(ui32Token & 0x0000ffff); + return (SPECIAL_NAME)(ui32Token & 0x0000ffff); } typedef enum OPERAND_INDEX_REPRESENTATION { OPERAND_INDEX_IMMEDIATE32 = 0, // Extra DWORD OPERAND_INDEX_IMMEDIATE64 = 1, // 2 Extra DWORDs - // (HI32:LO32) + // (HI32:LO32) OPERAND_INDEX_RELATIVE = 2, // Extra operand OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE = 3, // Extra DWORD followed by - // extra operand + // extra operand OPERAND_INDEX_IMMEDIATE64_PLUS_RELATIVE = 4, // 2 Extra DWORDS - // (HI32:LO32) followed - // by extra operand + // (HI32:LO32) followed + // by extra operand } OPERAND_INDEX_REPRESENTATION; static OPERAND_INDEX_REPRESENTATION DecodeOperandIndexRepresentation(uint32_t ui32Dimension, uint32_t ui32Token) { - return (OPERAND_INDEX_REPRESENTATION)((ui32Token & (0x3<<(22+3*((ui32Dimension)&3)))) >> (22+3*((ui32Dimension)&3))); + return (OPERAND_INDEX_REPRESENTATION)((ui32Token & (0x3 << (22 + 3 * ((ui32Dimension) & 3)))) >> (22 + 3 * ((ui32Dimension) & 3))); } typedef enum OPERAND_NUM_COMPONENTS @@ -447,7 +446,7 @@ typedef enum OPERAND_NUM_COMPONENTS static OPERAND_NUM_COMPONENTS DecodeOperandNumComponents(uint32_t ui32Token) { - return (OPERAND_NUM_COMPONENTS)(ui32Token & 0x00000003); + return (OPERAND_NUM_COMPONENTS)(ui32Token & 0x00000003); } typedef enum OPERAND_4_COMPONENT_SELECTION_MODE @@ -459,7 +458,7 @@ typedef enum OPERAND_4_COMPONENT_SELECTION_MODE static OPERAND_4_COMPONENT_SELECTION_MODE DecodeOperand4CompSelMode(uint32_t ui32Token) { - return (OPERAND_4_COMPONENT_SELECTION_MODE)((ui32Token & 0x0000000c) >> 2); + return (OPERAND_4_COMPONENT_SELECTION_MODE)((ui32Token & 0x0000000c) >> 2); } #define OPERAND_4_COMPONENT_MASK_X 0x00000001 @@ -474,17 +473,17 @@ static OPERAND_4_COMPONENT_SELECTION_MODE DecodeOperand4CompSelMode(uint32_t ui3 static uint32_t DecodeOperand4CompMask(uint32_t ui32Token) { - return (uint32_t)((ui32Token & 0x000000f0) >> 4); + return (uint32_t)((ui32Token & 0x000000f0) >> 4); } static uint32_t DecodeOperand4CompSwizzle(uint32_t ui32Token) { - return (uint32_t)((ui32Token & 0x00000ff0) >> 4); + return (uint32_t)((ui32Token & 0x00000ff0) >> 4); } static uint32_t DecodeOperand4CompSel1(uint32_t ui32Token) { - return (uint32_t)((ui32Token & 0x00000030) >> 4); + return (uint32_t)((ui32Token & 0x00000030) >> 4); } #define OPERAND_4_COMPONENT_X 0 @@ -492,7 +491,7 @@ static uint32_t DecodeOperand4CompSel1(uint32_t ui32Token) #define OPERAND_4_COMPONENT_Z 2 #define OPERAND_4_COMPONENT_W 3 -static const uint32_t NO_SWIZZLE = (( (OPERAND_4_COMPONENT_X) | (OPERAND_4_COMPONENT_Y<<2) | (OPERAND_4_COMPONENT_Z << 4) | (OPERAND_4_COMPONENT_W << 6))/*<<4*/); +static const uint32_t NO_SWIZZLE = (((OPERAND_4_COMPONENT_X) | (OPERAND_4_COMPONENT_Y << 2) | (OPERAND_4_COMPONENT_Z << 4) | (OPERAND_4_COMPONENT_W << 6)) /*<<4*/); static const uint32_t XXXX_SWIZZLE = (((OPERAND_4_COMPONENT_X) | (OPERAND_4_COMPONENT_X << 2) | (OPERAND_4_COMPONENT_X << 4) | (OPERAND_4_COMPONENT_X << 6))); static const uint32_t YYYY_SWIZZLE = (((OPERAND_4_COMPONENT_Y) | (OPERAND_4_COMPONENT_Y << 2) | (OPERAND_4_COMPONENT_Y << 4) | (OPERAND_4_COMPONENT_Y << 6))); @@ -501,7 +500,7 @@ static const uint32_t WWWW_SWIZZLE = (((OPERAND_4_COMPONENT_W) | (OPERAND_4_COMP static uint32_t DecodeOperand4CompSwizzleSource(uint32_t ui32Token, uint32_t comp) { - return (uint32_t)(((ui32Token)>>(4+2*((comp)&3)))&3); + return (uint32_t)(((ui32Token) >> (4 + 2 * ((comp) & 3))) & 3); } typedef enum RESOURCE_DIMENSION @@ -523,12 +522,12 @@ typedef enum RESOURCE_DIMENSION static RESOURCE_DIMENSION DecodeResourceDimension(uint32_t ui32Token) { - return (RESOURCE_DIMENSION)((ui32Token & 0x0000f800) >> 11); + return (RESOURCE_DIMENSION)((ui32Token & 0x0000f800) >> 11); } static RESOURCE_DIMENSION DecodeExtendedResourceDimension(uint32_t ui32Token) { - return (RESOURCE_DIMENSION)((ui32Token & 0x000007C0) >> 6); + return (RESOURCE_DIMENSION)((ui32Token & 0x000007C0) >> 6); } typedef enum INSTRUCTION_TEST_BOOLEAN @@ -539,7 +538,7 @@ typedef enum INSTRUCTION_TEST_BOOLEAN static INSTRUCTION_TEST_BOOLEAN DecodeInstrTestBool(uint32_t ui32Token) { - return (INSTRUCTION_TEST_BOOLEAN)((ui32Token & 0x00040000) >> 18); + return (INSTRUCTION_TEST_BOOLEAN)((ui32Token & 0x00040000) >> 18); } static uint32_t DecodeIsOperandExtended(uint32_t ui32Token) @@ -555,7 +554,7 @@ typedef enum EXTENDED_OPERAND_TYPE static EXTENDED_OPERAND_TYPE DecodeExtendedOperandType(uint32_t ui32Token) { - return (EXTENDED_OPERAND_TYPE)(ui32Token & 0x0000003f); + return (EXTENDED_OPERAND_TYPE)(ui32Token & 0x0000003f); } typedef enum OPERAND_MODIFIER @@ -568,29 +567,28 @@ typedef enum OPERAND_MODIFIER static OPERAND_MODIFIER DecodeExtendedOperandModifier(uint32_t ui32Token) { - return (OPERAND_MODIFIER)((ui32Token & 0x00003fc0) >> 6); + return (OPERAND_MODIFIER)((ui32Token & 0x00003fc0) >> 6); } -static const uint32_t GLOBAL_FLAG_REFACTORING_ALLOWED = (1<<11); -static const uint32_t GLOBAL_FLAG_ENABLE_DOUBLE_PRECISION_FLOAT_OPS = (1<<12); -static const uint32_t GLOBAL_FLAG_FORCE_EARLY_DEPTH_STENCIL = (1<<13); -static const uint32_t GLOBAL_FLAG_ENABLE_RAW_AND_STRUCTURED_BUFFERS = (1<<14); -static const uint32_t GLOBAL_FLAG_SKIP_OPTIMIZATION = (1<<15); -static const uint32_t GLOBAL_FLAG_ENABLE_MINIMUM_PRECISION = (1<<16); -static const uint32_t GLOBAL_FLAG_ENABLE_DOUBLE_EXTENSIONS = (1<<17); -static const uint32_t GLOBAL_FLAG_ENABLE_SHADER_EXTENSIONS = (1<<18); +static const uint32_t GLOBAL_FLAG_REFACTORING_ALLOWED = (1 << 11); +static const uint32_t GLOBAL_FLAG_ENABLE_DOUBLE_PRECISION_FLOAT_OPS = (1 << 12); +static const uint32_t GLOBAL_FLAG_FORCE_EARLY_DEPTH_STENCIL = (1 << 13); +static const uint32_t GLOBAL_FLAG_ENABLE_RAW_AND_STRUCTURED_BUFFERS = (1 << 14); +static const uint32_t GLOBAL_FLAG_SKIP_OPTIMIZATION = (1 << 15); +static const uint32_t GLOBAL_FLAG_ENABLE_MINIMUM_PRECISION = (1 << 16); +static const uint32_t GLOBAL_FLAG_ENABLE_DOUBLE_EXTENSIONS = (1 << 17); +static const uint32_t GLOBAL_FLAG_ENABLE_SHADER_EXTENSIONS = (1 << 18); static uint32_t DecodeGlobalFlags(uint32_t ui32Token) { - return (uint32_t)(ui32Token & 0x00fff800); + return (uint32_t)(ui32Token & 0x00fff800); } static INTERPOLATION_MODE DecodeInterpolationMode(uint32_t ui32Token) { - return (INTERPOLATION_MODE)((ui32Token & 0x00007800) >> 11); + return (INTERPOLATION_MODE)((ui32Token & 0x00007800) >> 11); } - typedef enum PRIMITIVE_TOPOLOGY { PRIMITIVE_TOPOLOGY_UNDEFINED = 0, @@ -609,7 +607,7 @@ typedef enum PRIMITIVE_TOPOLOGY static PRIMITIVE_TOPOLOGY DecodeGSOutputPrimitiveTopology(uint32_t ui32Token) { - return (PRIMITIVE_TOPOLOGY)((ui32Token & 0x0001f800) >> 11); + return (PRIMITIVE_TOPOLOGY)((ui32Token & 0x0001f800) >> 11); } typedef enum PRIMITIVE @@ -657,22 +655,22 @@ typedef enum PRIMITIVE static PRIMITIVE DecodeGSInputPrimitive(uint32_t ui32Token) { - return (PRIMITIVE)((ui32Token & 0x0001f800) >> 11); + return (PRIMITIVE)((ui32Token & 0x0001f800) >> 11); } static TESSELLATOR_PARTITIONING DecodeTessPartitioning(uint32_t ui32Token) { - return (TESSELLATOR_PARTITIONING)((ui32Token & 0x00003800) >> 11); + return (TESSELLATOR_PARTITIONING)((ui32Token & 0x00003800) >> 11); } static TESSELLATOR_DOMAIN DecodeTessDomain(uint32_t ui32Token) { - return (TESSELLATOR_DOMAIN)((ui32Token & 0x00001800) >> 11); + return (TESSELLATOR_DOMAIN)((ui32Token & 0x00001800) >> 11); } static TESSELLATOR_OUTPUT_PRIMITIVE DecodeTessOutPrim(uint32_t ui32Token) { - return (TESSELLATOR_OUTPUT_PRIMITIVE)((ui32Token & 0x00003800) >> 11); + return (TESSELLATOR_OUTPUT_PRIMITIVE)((ui32Token & 0x00003800) >> 11); } static const uint32_t SYNC_THREADS_IN_GROUP = 0x00000800; @@ -682,19 +680,19 @@ static const uint32_t SYNC_UNORDERED_ACCESS_VIEW_MEMORY_GLOBAL = 0x00004000; static uint32_t DecodeSyncFlags(uint32_t ui32Token) { - return ui32Token & 0x00007800; + return ui32Token & 0x00007800; } // The number of types that implement this interface static uint32_t DecodeInterfaceTableLength(uint32_t ui32Token) { - return (uint32_t)((ui32Token & 0x0000ffff) >> 0); + return (uint32_t)((ui32Token & 0x0000ffff) >> 0); } // The number of interfaces that are defined in this array. static uint32_t DecodeInterfaceArrayLength(uint32_t ui32Token) { - return (uint32_t)((ui32Token & 0xffff0000) >> 16); + return (uint32_t)((ui32Token & 0xffff0000) >> 16); } typedef enum CUSTOMDATA_CLASS @@ -708,7 +706,7 @@ typedef enum CUSTOMDATA_CLASS static CUSTOMDATA_CLASS DecodeCustomDataClass(uint32_t ui32Token) { - return (CUSTOMDATA_CLASS)((ui32Token & 0xfffff800) >> 11); + return (CUSTOMDATA_CLASS)((ui32Token & 0xfffff800) >> 11); } static uint32_t DecodeInstructionSaturate(uint32_t ui32Token) @@ -718,8 +716,8 @@ static uint32_t DecodeInstructionSaturate(uint32_t ui32Token) typedef enum OPERAND_MIN_PRECISION { - OPERAND_MIN_PRECISION_DEFAULT = 0, // Default precision - // for the shader model + OPERAND_MIN_PRECISION_DEFAULT = 0, // Default precision + // for the shader model OPERAND_MIN_PRECISION_FLOAT_16 = 1, // Min 16 bit/component float OPERAND_MIN_PRECISION_FLOAT_2_8 = 2, // Min 10(2.8)bit/comp. float OPERAND_MIN_PRECISION_SINT_16 = 4, // Min 16 bit/comp. signed integer @@ -733,7 +731,7 @@ static uint32_t DecodeOperandMinPrecision(uint32_t ui32Token) static uint32_t DecodeOutputControlPointCount(uint32_t ui32Token) { - return ((ui32Token & 0x0001f800) >> 11); + return ((ui32Token & 0x0001f800) >> 11); } typedef enum IMMEDIATE_ADDRESS_OFFSET_COORD @@ -749,7 +747,7 @@ typedef enum IMMEDIATE_ADDRESS_OFFSET_COORD static uint32_t DecodeImmediateAddressOffset(IMMEDIATE_ADDRESS_OFFSET_COORD eCoord, uint32_t ui32Token) { - return ((((ui32Token)&IMMEDIATE_ADDRESS_OFFSET_MASK(eCoord))>>(IMMEDIATE_ADDRESS_OFFSET_SHIFT(eCoord)))); + return ((((ui32Token) & IMMEDIATE_ADDRESS_OFFSET_MASK(eCoord)) >> (IMMEDIATE_ADDRESS_OFFSET_SHIFT(eCoord)))); } // UAV access scope flags @@ -759,7 +757,6 @@ static uint32_t DecodeAccessCoherencyFlags(uint32_t ui32Token) return ui32Token & 0x00010000; } - typedef enum RESINFO_RETURN_TYPE { RESINFO_INSTRUCTION_RETURN_FLOAT = 0, @@ -774,15 +771,14 @@ static RESINFO_RETURN_TYPE DecodeResInfoReturnType(uint32_t ui32Token) typedef enum SB_SAMPLER_MODE { - D3D10_SB_SAMPLER_MODE_DEFAULT = 0, - D3D10_SB_SAMPLER_MODE_COMPARISON = 1, - D3D10_SB_SAMPLER_MODE_MONO = 2, + D3D10_SB_SAMPLER_MODE_DEFAULT = 0, + D3D10_SB_SAMPLER_MODE_COMPARISON = 1, + D3D10_SB_SAMPLER_MODE_MONO = 2, } SB_SAMPLER_MODE; static SB_SAMPLER_MODE DecodeSamplerMode(uint32_t ui32Token) { - return (SB_SAMPLER_MODE)((ui32Token & 0x00001800) >> 11); + return (SB_SAMPLER_MODE)((ui32Token & 0x00001800) >> 11); } - #endif diff --git a/src/reflect.cpp b/src/reflect.cpp index eef15ce..dca2004 100644 --- a/src/reflect.cpp +++ b/src/reflect.cpp @@ -1,4 +1,3 @@ - #include "internal_includes/reflect.h" #include "internal_includes/debug.h" #include "internal_includes/decode.h" @@ -18,11 +17,11 @@ static void FormatVariableName(std::string & Name) /* Leave $ThisPointer, $Element and $Globals as-is. Otherwise remove $ character ($ is not a valid character for GLSL variable names). */ - if(Name[0] == '$') + if (Name[0] == '$') { - if(strcmp(Name.c_str(), "$Element") !=0 && - strcmp(Name.c_str(), "$Globals") != 0 && - strcmp(Name.c_str(), "$ThisPointer") != 0) + if (strcmp(Name.c_str(), "$Element") != 0 && + strcmp(Name.c_str(), "$Globals") != 0 && + strcmp(Name.c_str(), "$ThisPointer") != 0) { Name[0] = '_'; } @@ -31,25 +30,25 @@ static void FormatVariableName(std::string & Name) static std::string ReadStringFromTokenStream(const uint32_t* tokens) { - char* charTokens = (char*) tokens; - return std::string(charTokens); + char* charTokens = (char*)tokens; + return std::string(charTokens); } static int MaskToRebaseOffset(const uint32_t mask) { - int res = 0; - uint32_t m = mask; - while ((m & 1) == 0) - { - res++; - m = m >> 1; - } - return res; + int res = 0; + uint32_t m = mask; + while ((m & 1) == 0) + { + res++; + m = m >> 1; + } + return res; } static void ReadInputSignatures(const uint32_t* pui32Tokens, - ShaderInfo* psShaderInfo, - const int extended) + ShaderInfo* psShaderInfo, + const int extended) { uint32_t i; @@ -57,44 +56,44 @@ static void ReadInputSignatures(const uint32_t* pui32Tokens, const uint32_t ui32ElementCount = *pui32Tokens++; /* const uint32_t ui32Key = * */ pui32Tokens++; - psShaderInfo->psInputSignatures.clear(); - psShaderInfo->psInputSignatures.resize(ui32ElementCount); + psShaderInfo->psInputSignatures.clear(); + psShaderInfo->psInputSignatures.resize(ui32ElementCount); - for(i=0; ipsInputSignatures[i]; uint32_t ui32SemanticNameOffset; - psCurrentSignature->ui32Stream = 0; - psCurrentSignature->eMinPrec = MIN_PRECISION_DEFAULT; + psCurrentSignature->ui32Stream = 0; + psCurrentSignature->eMinPrec = MIN_PRECISION_DEFAULT; - if(extended) - psCurrentSignature->ui32Stream = *pui32Tokens++; + if (extended) + psCurrentSignature->ui32Stream = *pui32Tokens++; - ui32SemanticNameOffset = *pui32Tokens++; + ui32SemanticNameOffset = *pui32Tokens++; psCurrentSignature->ui32SemanticIndex = *pui32Tokens++; - psCurrentSignature->eSystemValueType = (SPECIAL_NAME) *pui32Tokens++; - psCurrentSignature->eComponentType = (INOUT_COMPONENT_TYPE) *pui32Tokens++; + psCurrentSignature->eSystemValueType = (SPECIAL_NAME)*pui32Tokens++; + psCurrentSignature->eComponentType = (INOUT_COMPONENT_TYPE)*pui32Tokens++; psCurrentSignature->ui32Register = *pui32Tokens++; - + ui32ComponentMasks = *pui32Tokens++; psCurrentSignature->ui32Mask = ui32ComponentMasks & 0x7F; //Shows which components are read psCurrentSignature->ui32ReadWriteMask = (ui32ComponentMasks & 0x7F00) >> 8; - psCurrentSignature->iRebase = MaskToRebaseOffset(psCurrentSignature->ui32Mask); + psCurrentSignature->iRebase = MaskToRebaseOffset(psCurrentSignature->ui32Mask); - if(extended) - psCurrentSignature->eMinPrec = (MIN_PRECISION) *pui32Tokens++; + if (extended) + psCurrentSignature->eMinPrec = (MIN_PRECISION)*pui32Tokens++; - psCurrentSignature->semanticName = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstSignatureToken+ui32SemanticNameOffset)); + psCurrentSignature->semanticName = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstSignatureToken + ui32SemanticNameOffset)); } } static void ReadOutputSignatures(const uint32_t* pui32Tokens, - ShaderInfo* psShaderInfo, - const int minPrec, - const int streams) + ShaderInfo* psShaderInfo, + const int minPrec, + const int streams) { uint32_t i; @@ -102,50 +101,50 @@ static void ReadOutputSignatures(const uint32_t* pui32Tokens, const uint32_t ui32ElementCount = *pui32Tokens++; /*const uint32_t ui32Key = * */ pui32Tokens++; - psShaderInfo->psOutputSignatures.clear(); - psShaderInfo->psOutputSignatures.resize(ui32ElementCount); + psShaderInfo->psOutputSignatures.clear(); + psShaderInfo->psOutputSignatures.resize(ui32ElementCount); - for(i=0; ipsOutputSignatures[i]; uint32_t ui32SemanticNameOffset; - psCurrentSignature->ui32Stream = 0; - psCurrentSignature->eMinPrec = MIN_PRECISION_DEFAULT; + psCurrentSignature->ui32Stream = 0; + psCurrentSignature->eMinPrec = MIN_PRECISION_DEFAULT; - if(streams) - psCurrentSignature->ui32Stream = *pui32Tokens++; + if (streams) + psCurrentSignature->ui32Stream = *pui32Tokens++; - ui32SemanticNameOffset = *pui32Tokens++; + ui32SemanticNameOffset = *pui32Tokens++; psCurrentSignature->ui32SemanticIndex = *pui32Tokens++; psCurrentSignature->eSystemValueType = (SPECIAL_NAME)*pui32Tokens++; - psCurrentSignature->eComponentType = (INOUT_COMPONENT_TYPE) *pui32Tokens++; + psCurrentSignature->eComponentType = (INOUT_COMPONENT_TYPE)*pui32Tokens++; psCurrentSignature->ui32Register = *pui32Tokens++; - // Massage some special inputs/outputs to match the types of GLSL counterparts - if (psCurrentSignature->eSystemValueType == NAME_RENDER_TARGET_ARRAY_INDEX) - { - psCurrentSignature->eComponentType = INOUT_COMPONENT_SINT32; - } + // Massage some special inputs/outputs to match the types of GLSL counterparts + if (psCurrentSignature->eSystemValueType == NAME_RENDER_TARGET_ARRAY_INDEX) + { + psCurrentSignature->eComponentType = INOUT_COMPONENT_SINT32; + } ui32ComponentMasks = *pui32Tokens++; psCurrentSignature->ui32Mask = ui32ComponentMasks & 0x7F; //Shows which components are NEVER written. psCurrentSignature->ui32ReadWriteMask = (ui32ComponentMasks & 0x7F00) >> 8; - psCurrentSignature->iRebase = MaskToRebaseOffset(psCurrentSignature->ui32Mask); + psCurrentSignature->iRebase = MaskToRebaseOffset(psCurrentSignature->ui32Mask); - if(minPrec) - psCurrentSignature->eMinPrec = (MIN_PRECISION)*pui32Tokens++; + if (minPrec) + psCurrentSignature->eMinPrec = (MIN_PRECISION)*pui32Tokens++; - psCurrentSignature->semanticName = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstSignatureToken + ui32SemanticNameOffset)); + psCurrentSignature->semanticName = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstSignatureToken + ui32SemanticNameOffset)); } } static void ReadPatchConstantSignatures(const uint32_t* pui32Tokens, - ShaderInfo* psShaderInfo, - const int minPrec, - const int streams) + ShaderInfo* psShaderInfo, + const int minPrec, + const int streams) { uint32_t i; @@ -153,94 +152,101 @@ static void ReadPatchConstantSignatures(const uint32_t* pui32Tokens, const uint32_t ui32ElementCount = *pui32Tokens++; /*const uint32_t ui32Key = * */ pui32Tokens++; - psShaderInfo->psPatchConstantSignatures.clear(); - psShaderInfo->psPatchConstantSignatures.resize(ui32ElementCount); + psShaderInfo->psPatchConstantSignatures.clear(); + psShaderInfo->psPatchConstantSignatures.resize(ui32ElementCount); - for(i=0; ipsPatchConstantSignatures[i]; + ShaderInfo::InOutSignature* psCurrentSignature = &psShaderInfo->psPatchConstantSignatures[i]; uint32_t ui32SemanticNameOffset; - psCurrentSignature->ui32Stream = 0; - psCurrentSignature->eMinPrec = MIN_PRECISION_DEFAULT; + psCurrentSignature->ui32Stream = 0; + psCurrentSignature->eMinPrec = MIN_PRECISION_DEFAULT; - if(streams) - psCurrentSignature->ui32Stream = *pui32Tokens++; + if (streams) + psCurrentSignature->ui32Stream = *pui32Tokens++; - ui32SemanticNameOffset = *pui32Tokens++; + ui32SemanticNameOffset = *pui32Tokens++; psCurrentSignature->ui32SemanticIndex = *pui32Tokens++; psCurrentSignature->eSystemValueType = (SPECIAL_NAME)*pui32Tokens++; - psCurrentSignature->eComponentType = (INOUT_COMPONENT_TYPE) *pui32Tokens++; + psCurrentSignature->eComponentType = (INOUT_COMPONENT_TYPE)*pui32Tokens++; psCurrentSignature->ui32Register = *pui32Tokens++; - // Massage some special inputs/outputs to match the types of GLSL counterparts - if (psCurrentSignature->eSystemValueType == NAME_RENDER_TARGET_ARRAY_INDEX) - { - psCurrentSignature->eComponentType = INOUT_COMPONENT_SINT32; - } + // Massage some special inputs/outputs to match the types of GLSL counterparts + if (psCurrentSignature->eSystemValueType == NAME_RENDER_TARGET_ARRAY_INDEX) + { + psCurrentSignature->eComponentType = INOUT_COMPONENT_SINT32; + } ui32ComponentMasks = *pui32Tokens++; psCurrentSignature->ui32Mask = ui32ComponentMasks & 0x7F; //Shows which components are NEVER written. psCurrentSignature->ui32ReadWriteMask = (ui32ComponentMasks & 0x7F00) >> 8; - psCurrentSignature->iRebase = MaskToRebaseOffset(psCurrentSignature->ui32Mask); + psCurrentSignature->iRebase = MaskToRebaseOffset(psCurrentSignature->ui32Mask); - if(minPrec) - psCurrentSignature->eMinPrec = (MIN_PRECISION)*pui32Tokens++; + if (minPrec) + psCurrentSignature->eMinPrec = (MIN_PRECISION)*pui32Tokens++; - psCurrentSignature->semanticName = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstSignatureToken + ui32SemanticNameOffset)); + psCurrentSignature->semanticName = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstSignatureToken + ui32SemanticNameOffset)); } } -static const uint32_t* ReadResourceBinding(const uint32_t* pui32FirstResourceToken, const uint32_t* pui32Tokens, ResourceBinding* psBinding, uint32_t decodeFlags) +static const uint32_t* ReadResourceBinding(ShaderInfo* psShaderInfo, const uint32_t* pui32FirstResourceToken, const uint32_t* pui32Tokens, ResourceBinding* psBinding, uint32_t decodeFlags) { uint32_t ui32NameOffset = *pui32Tokens++; - psBinding->name = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstResourceToken+ui32NameOffset)); + psBinding->name = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstResourceToken + ui32NameOffset)); FormatVariableName(psBinding->name); - psBinding->eType = (ResourceType)*pui32Tokens++; + psBinding->eType = (ResourceType) * pui32Tokens++; psBinding->ui32ReturnType = (RESOURCE_RETURN_TYPE)*pui32Tokens++; psBinding->eDimension = (REFLECT_RESOURCE_DIMENSION)*pui32Tokens++; - psBinding->ui32NumSamples = *pui32Tokens++; + psBinding->ui32NumSamples = *pui32Tokens++; // fxc generates 2^32 - 1 for non MS images psBinding->ui32BindPoint = *pui32Tokens++; psBinding->ui32BindCount = *pui32Tokens++; psBinding->ui32Flags = *pui32Tokens++; - psBinding->ePrecision = REFLECT_RESOURCE_PRECISION_UNKNOWN; - - if (decodeFlags & HLSLCC_FLAG_SAMPLER_PRECISION_ENCODED_IN_NAME) - { - if (psBinding->name.rfind("_highp") == psBinding->name.length() - 6) - { - psBinding->ePrecision = REFLECT_RESOURCE_PRECISION_HIGHP; - psBinding->name.resize(psBinding->name.length() - 6); - } - else if (psBinding->name.rfind("_mediump") == psBinding->name.length() - 8) - { - psBinding->ePrecision = REFLECT_RESOURCE_PRECISION_MEDIUMP; - psBinding->name.resize(psBinding->name.length() - 8); - } - else if (psBinding->name.rfind("_lowp") == psBinding->name.length() - 5) - { - psBinding->ePrecision = REFLECT_RESOURCE_PRECISION_LOWP; - psBinding->name.resize(psBinding->name.length() - 5); - } - } + if (((psShaderInfo->ui32MajorVersion >= 5) && (psShaderInfo->ui32MinorVersion >= 1)) || + (psShaderInfo->ui32MajorVersion > 5)) + { + psBinding->ui32Space = *pui32Tokens++; + psBinding->ui32RangeID = *pui32Tokens++; + } + + psBinding->ePrecision = REFLECT_RESOURCE_PRECISION_UNKNOWN; + + if (decodeFlags & HLSLCC_FLAG_SAMPLER_PRECISION_ENCODED_IN_NAME) + { + if (psBinding->name.rfind("_highp") == psBinding->name.length() - 6) + { + psBinding->ePrecision = REFLECT_RESOURCE_PRECISION_HIGHP; + psBinding->name.resize(psBinding->name.length() - 6); + } + else if (psBinding->name.rfind("_mediump") == psBinding->name.length() - 8) + { + psBinding->ePrecision = REFLECT_RESOURCE_PRECISION_MEDIUMP; + psBinding->name.resize(psBinding->name.length() - 8); + } + else if (psBinding->name.rfind("_lowp") == psBinding->name.length() - 5) + { + psBinding->ePrecision = REFLECT_RESOURCE_PRECISION_LOWP; + psBinding->name.resize(psBinding->name.length() - 5); + } + } return pui32Tokens; } //Read D3D11_SHADER_TYPE_DESC static void ReadShaderVariableType(const uint32_t ui32MajorVersion, - const uint32_t* pui32FirstConstBufToken, - const uint32_t* pui32tokens, ShaderVarType* varType) + const uint32_t* pui32FirstConstBufToken, + const uint32_t* pui32tokens, ShaderVarType* varType) { - const uint16_t* pui16Tokens = (const uint16_t*) pui32tokens; + const uint16_t* pui16Tokens = (const uint16_t*)pui32tokens; uint16_t ui32MemberCount; uint32_t ui32MemberOffset; - const uint32_t* pui32MemberTokens; - uint32_t i; + const uint32_t* pui32MemberTokens; + uint32_t i; varType->Class = (SHADER_VARIABLE_CLASS)pui16Tokens[0]; varType->Type = (SHADER_VARIABLE_TYPE)pui16Tokens[1]; @@ -249,41 +255,41 @@ static void ReadShaderVariableType(const uint32_t ui32MajorVersion, varType->Elements = pui16Tokens[4]; varType->MemberCount = ui32MemberCount = pui16Tokens[5]; - varType->Members.clear(); - - if(varType->ParentCount) - { - // Add empty brackets for array parents. Indices are filled in later in the printing codes. - if (varType->Parent->Elements > 1) - varType->fullName = varType->Parent->fullName + "[]." + varType->name; - else - varType->fullName = varType->Parent->fullName + "." + varType->name; - } - - if(ui32MemberCount) - { - varType->Members.resize(ui32MemberCount); - - ui32MemberOffset = pui32tokens[3]; - - pui32MemberTokens = (const uint32_t*)((const char*)pui32FirstConstBufToken+ui32MemberOffset); - - for(i=0; i< ui32MemberCount; ++i) - { - uint32_t ui32NameOffset = *pui32MemberTokens++; - uint32_t ui32MemberTypeOffset = *pui32MemberTokens++; - - varType->Members[i].Parent = varType; - varType->Members[i].ParentCount = varType->ParentCount + 1; - - varType->Members[i].Offset = *pui32MemberTokens++; - - varType->Members[i].name = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstConstBufToken + ui32NameOffset)); - - ReadShaderVariableType(ui32MajorVersion, pui32FirstConstBufToken, - (const uint32_t*)((const char*)pui32FirstConstBufToken+ui32MemberTypeOffset), &varType->Members[i]); - } - } + varType->Members.clear(); + + if (varType->ParentCount) + { + // Add empty brackets for array parents. Indices are filled in later in the printing codes. + if (varType->Parent->Elements > 1) + varType->fullName = varType->Parent->fullName + "[]." + varType->name; + else + varType->fullName = varType->Parent->fullName + "." + varType->name; + } + + if (ui32MemberCount) + { + varType->Members.resize(ui32MemberCount); + + ui32MemberOffset = pui32tokens[3]; + + pui32MemberTokens = (const uint32_t*)((const char*)pui32FirstConstBufToken + ui32MemberOffset); + + for (i = 0; i < ui32MemberCount; ++i) + { + uint32_t ui32NameOffset = *pui32MemberTokens++; + uint32_t ui32MemberTypeOffset = *pui32MemberTokens++; + + varType->Members[i].Parent = varType; + varType->Members[i].ParentCount = varType->ParentCount + 1; + + varType->Members[i].Offset = *pui32MemberTokens++; + + varType->Members[i].name = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstConstBufToken + ui32NameOffset)); + + ReadShaderVariableType(ui32MajorVersion, pui32FirstConstBufToken, + (const uint32_t*)((const char*)pui32FirstConstBufToken + ui32MemberTypeOffset), &varType->Members[i]); + } + } } static const uint32_t* ReadConstantBuffer(ShaderInfo* psShaderInfo, @@ -293,15 +299,15 @@ static const uint32_t* ReadConstantBuffer(ShaderInfo* psShaderInfo, uint32_t ui32NameOffset = *pui32Tokens++; uint32_t ui32VarCount = *pui32Tokens++; uint32_t ui32VarOffset = *pui32Tokens++; - const uint32_t* pui32VarToken = (const uint32_t*)((const char*)pui32FirstConstBufToken+ui32VarOffset); + const uint32_t* pui32VarToken = (const uint32_t*)((const char*)pui32FirstConstBufToken + ui32VarOffset); - psBuffer->name = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstConstBufToken + ui32NameOffset)); + psBuffer->name = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstConstBufToken + ui32NameOffset)); FormatVariableName(psBuffer->name); - psBuffer->asVars.clear(); - psBuffer->asVars.resize(ui32VarCount); + psBuffer->asVars.clear(); + psBuffer->asVars.resize(ui32VarCount); - for(i=0; iasVars[i]; @@ -311,66 +317,66 @@ static const uint32_t* ReadConstantBuffer(ShaderInfo* psShaderInfo, ui32NameOffset = *pui32VarToken++; - psVar->name = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstConstBufToken + ui32NameOffset)); + psVar->name = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstConstBufToken + ui32NameOffset)); FormatVariableName(psVar->name); psVar->ui32StartOffset = *pui32VarToken++; psVar->ui32Size = *pui32VarToken++; - + //skip ui32Flags pui32VarToken++; - + ui32TypeOffset = *pui32VarToken++; - psVar->sType.name = psVar->name; - psVar->sType.fullName = psVar->name; - psVar->sType.Parent = 0; - psVar->sType.ParentCount = 0; - psVar->sType.Offset = 0; - psVar->sType.m_IsUsed = false; + psVar->sType.name = psVar->name; + psVar->sType.fullName = psVar->name; + psVar->sType.Parent = 0; + psVar->sType.ParentCount = 0; + psVar->sType.Offset = 0; + psVar->sType.m_IsUsed = false; - ReadShaderVariableType(psShaderInfo->ui32MajorVersion, pui32FirstConstBufToken, - (const uint32_t*)((const char*)pui32FirstConstBufToken+ui32TypeOffset), &psVar->sType); + ReadShaderVariableType(psShaderInfo->ui32MajorVersion, pui32FirstConstBufToken, + (const uint32_t*)((const char*)pui32FirstConstBufToken + ui32TypeOffset), &psVar->sType); ui32DefaultValueOffset = *pui32VarToken++; - if (psShaderInfo->ui32MajorVersion >= 5) - { - /*uint32_t StartTexture = * */pui32VarToken++; - /*uint32_t TextureSize = * */pui32VarToken++; - /*uint32_t StartSampler = * */pui32VarToken++; - /*uint32_t SamplerSize = * */pui32VarToken++; - } + if (psShaderInfo->ui32MajorVersion >= 5) + { + /*uint32_t StartTexture = * */ pui32VarToken++; + /*uint32_t TextureSize = * */ pui32VarToken++; + /*uint32_t StartSampler = * */ pui32VarToken++; + /*uint32_t SamplerSize = * */ pui32VarToken++; + } - psVar->haveDefaultValue = 0; + psVar->haveDefaultValue = 0; - if(ui32DefaultValueOffset) + if (ui32DefaultValueOffset) { - uint32_t i = 0; - const uint32_t ui32NumDefaultValues = psVar->ui32Size / 4; - const uint32_t* pui32DefaultValToken = (const uint32_t*)((const char*)pui32FirstConstBufToken+ui32DefaultValueOffset); + uint32_t i = 0; + const uint32_t ui32NumDefaultValues = psVar->ui32Size / 4; + const uint32_t* pui32DefaultValToken = (const uint32_t*)((const char*)pui32FirstConstBufToken + ui32DefaultValueOffset); - //Always a sequence of 4-bytes at the moment. - //bool const becomes 0 or 0xFFFFFFFF int, int & float are 4-bytes. - ASSERT(psVar->ui32Size%4 == 0); + //Always a sequence of 4-bytes at the moment. + //bool const becomes 0 or 0xFFFFFFFF int, int & float are 4-bytes. + ASSERT(psVar->ui32Size % 4 == 0); - psVar->haveDefaultValue = 1; + psVar->haveDefaultValue = 1; - psVar->pui32DefaultValues.clear(); - psVar->pui32DefaultValues.resize(psVar->ui32Size / 4); + psVar->pui32DefaultValues.clear(); + psVar->pui32DefaultValues.resize(psVar->ui32Size / 4); - for(i=0; ipui32DefaultValues[i] = pui32DefaultValToken[i]; - } + for (i = 0; i < ui32NumDefaultValues; ++i) + { + psVar->pui32DefaultValues[i] = pui32DefaultValToken[i]; + } } } { psBuffer->ui32TotalSizeInBytes = *pui32Tokens++; - + //skip ui32Flags pui32Tokens++; //skip ui32BufferType @@ -381,8 +387,8 @@ static const uint32_t* ReadConstantBuffer(ShaderInfo* psShaderInfo, } static void ReadResources(const uint32_t* pui32Tokens,//in - ShaderInfo* psShaderInfo, //out - uint32_t decodeFlags) + ShaderInfo* psShaderInfo, //out + uint32_t decodeFlags) { ResourceBinding* psResBindings; ConstantBuffer* psConstantBuffers; @@ -391,7 +397,7 @@ static void ReadResources(const uint32_t* pui32Tokens,//in const uint32_t* pui32FirstToken = pui32Tokens; uint32_t i; - const uint32_t ui32NumConstantBuffers = *pui32Tokens++; + const uint32_t ui32NumConstantBuffers = *pui32Tokens++; const uint32_t ui32ConstantBufferOffset = *pui32Tokens++; uint32_t ui32NumResourceBindings = *pui32Tokens++; @@ -402,63 +408,63 @@ static void ReadResources(const uint32_t* pui32Tokens,//in //Resources pui32ResourceBindings = (const uint32_t*)((const char*)pui32FirstToken + ui32ResourceBindingOffset); - psShaderInfo->psResourceBindings.clear(); - psShaderInfo->psResourceBindings.resize(ui32NumResourceBindings); - psResBindings = ui32NumResourceBindings == 0 ? NULL : &psShaderInfo->psResourceBindings[0]; + psShaderInfo->psResourceBindings.clear(); + psShaderInfo->psResourceBindings.resize(ui32NumResourceBindings); + psResBindings = ui32NumResourceBindings == 0 ? NULL : &psShaderInfo->psResourceBindings[0]; - for(i=0; i < ui32NumResourceBindings; ++i) + for (i = 0; i < ui32NumResourceBindings; ++i) { - pui32ResourceBindings = ReadResourceBinding(pui32FirstToken, pui32ResourceBindings, psResBindings+i, decodeFlags); - ASSERT(psResBindings[i].ui32BindPoint < MAX_RESOURCE_BINDINGS); - } + pui32ResourceBindings = ReadResourceBinding(psShaderInfo, pui32FirstToken, pui32ResourceBindings, psResBindings + i, decodeFlags); + ASSERT(psResBindings[i].ui32BindPoint < MAX_RESOURCE_BINDINGS); + } //Constant buffers pui32ConstantBuffers = (const uint32_t*)((const char*)pui32FirstToken + ui32ConstantBufferOffset); - psShaderInfo->psConstantBuffers.clear(); - psShaderInfo->psConstantBuffers.resize(ui32NumConstantBuffers); - psConstantBuffers = ui32NumConstantBuffers == 0 ? NULL : &psShaderInfo->psConstantBuffers[0]; + psShaderInfo->psConstantBuffers.clear(); + psShaderInfo->psConstantBuffers.resize(ui32NumConstantBuffers); + psConstantBuffers = ui32NumConstantBuffers == 0 ? NULL : &psShaderInfo->psConstantBuffers[0]; - for(i=0; i < ui32NumConstantBuffers; ++i) + for (i = 0; i < ui32NumConstantBuffers; ++i) { - pui32ConstantBuffers = ReadConstantBuffer(psShaderInfo, pui32FirstToken, pui32ConstantBuffers, psConstantBuffers+i); + pui32ConstantBuffers = ReadConstantBuffer(psShaderInfo, pui32FirstToken, pui32ConstantBuffers, psConstantBuffers + i); } - //Map resource bindings to constant buffers - if(psShaderInfo->psConstantBuffers.size()) - { - for(i=0; i < ui32NumResourceBindings; ++i) - { - ResourceGroup eRGroup; - uint32_t cbufIndex = 0; - - eRGroup = ShaderInfo::ResourceTypeToResourceGroup(psResBindings[i].eType); - - //Find the constant buffer whose name matches the resource at the given resource binding point - for(cbufIndex=0; cbufIndex < psShaderInfo->psConstantBuffers.size(); cbufIndex++) - { - if(psConstantBuffers[cbufIndex].name == psResBindings[i].name) - { - psShaderInfo->aui32ResourceMap[eRGroup][psResBindings[i].ui32BindPoint] = cbufIndex; - } - } - } - } + //Map resource bindings to constant buffers + if (psShaderInfo->psConstantBuffers.size()) + { + for (i = 0; i < ui32NumResourceBindings; ++i) + { + ResourceGroup eRGroup; + uint32_t cbufIndex = 0; + + eRGroup = ShaderInfo::ResourceTypeToResourceGroup(psResBindings[i].eType); + + //Find the constant buffer whose name matches the resource at the given resource binding point + for (cbufIndex = 0; cbufIndex < psShaderInfo->psConstantBuffers.size(); cbufIndex++) + { + if (psConstantBuffers[cbufIndex].name == psResBindings[i].name) + { + psShaderInfo->aui32ResourceMap[eRGroup][psResBindings[i].ui32BindPoint] = cbufIndex; + } + } + } + } } static const uint16_t* ReadClassType(const uint32_t* pui32FirstInterfaceToken, const uint16_t* pui16Tokens, ClassType* psClassType) { const uint32_t* pui32Tokens = (const uint32_t*)pui16Tokens; uint32_t ui32NameOffset = *pui32Tokens; - pui16Tokens+= 2; + pui16Tokens += 2; psClassType->ui16ID = *pui16Tokens++; psClassType->ui16ConstBufStride = *pui16Tokens++; psClassType->ui16Texture = *pui16Tokens++; psClassType->ui16Sampler = *pui16Tokens++; - psClassType->name = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstInterfaceToken + ui32NameOffset)); + psClassType->name = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstInterfaceToken + ui32NameOffset)); return pui16Tokens; } @@ -474,14 +480,13 @@ static const uint16_t* ReadClassInstance(const uint32_t* pui32FirstInterfaceToke psClassInstance->ui16Texture = *pui16Tokens++; psClassInstance->ui16Sampler = *pui16Tokens++; - psClassInstance->name = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstInterfaceToken + ui32NameOffset)); + psClassInstance->name = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstInterfaceToken + ui32NameOffset)); return pui16Tokens; } - static void ReadInterfaces(const uint32_t* pui32Tokens, - ShaderInfo* psShaderInfo) + ShaderInfo* psShaderInfo) { uint32_t i; uint32_t ui32StartSlot; @@ -503,64 +508,63 @@ static void ReadInterfaces(const uint32_t* pui32Tokens, ClassType* psClassTypes; ClassInstance* psClassInstances; - psShaderInfo->psClassTypes.clear(); - psShaderInfo->psClassTypes.resize(ui32ClassTypeCount); - psClassTypes = &psShaderInfo->psClassTypes[0]; + psShaderInfo->psClassTypes.clear(); + psShaderInfo->psClassTypes.resize(ui32ClassTypeCount); + psClassTypes = &psShaderInfo->psClassTypes[0]; - for(i=0; ipsClassInstances.clear(); - psShaderInfo->psClassInstances.resize(ui32ClassInstanceCount); - psClassInstances = &psShaderInfo->psClassInstances[0]; + psShaderInfo->psClassInstances.clear(); + psShaderInfo->psClassInstances.resize(ui32ClassInstanceCount); + psClassInstances = &psShaderInfo->psClassInstances[0]; - for(i=0; iaui32TableIDToTypeID[*pui32TableID++] = *pui16TypeID++; } ui32StartSlot += ui32SlotSpan; } - } void LoadShaderInfo(const uint32_t ui32MajorVersion, const uint32_t ui32MinorVersion, const ReflectionChunks* psChunks, ShaderInfo* psInfo, - uint32_t decodeFlags) + uint32_t decodeFlags) { const uint32_t* pui32Inputs = psChunks->pui32Inputs; - const uint32_t* pui32Inputs11 = psChunks->pui32Inputs11; + const uint32_t* pui32Inputs11 = psChunks->pui32Inputs11; const uint32_t* pui32Resources = psChunks->pui32Resources; const uint32_t* pui32Interfaces = psChunks->pui32Interfaces; const uint32_t* pui32Outputs = psChunks->pui32Outputs; - const uint32_t* pui32Outputs11 = psChunks->pui32Outputs11; - const uint32_t* pui32OutputsWithStreams = psChunks->pui32OutputsWithStreams; - const uint32_t* pui32PatchConstants = psChunks->pui32PatchConstants; - const uint32_t* pui32PatchConstants11 = psChunks->pui32PatchConstants11; + const uint32_t* pui32Outputs11 = psChunks->pui32Outputs11; + const uint32_t* pui32OutputsWithStreams = psChunks->pui32OutputsWithStreams; + const uint32_t* pui32PatchConstants = psChunks->pui32PatchConstants; + const uint32_t* pui32PatchConstants11 = psChunks->pui32PatchConstants11; psInfo->eTessOutPrim = TESSELLATOR_OUTPUT_UNDEFINED; psInfo->eTessPartitioning = TESSELLATOR_PARTITIONING_UNDEFINED; @@ -573,34 +577,33 @@ void LoadShaderInfo(const uint32_t ui32MajorVersion, psInfo->ui32MinorVersion = ui32MinorVersion; - if(pui32Inputs) + if (pui32Inputs) ReadInputSignatures(pui32Inputs, psInfo, 0); - if(pui32Inputs11) + if (pui32Inputs11) ReadInputSignatures(pui32Inputs11, psInfo, 1); - if(pui32Resources) + if (pui32Resources) ReadResources(pui32Resources, psInfo, decodeFlags); - if(pui32Interfaces) + if (pui32Interfaces) ReadInterfaces(pui32Interfaces, psInfo); - if(pui32Outputs) + if (pui32Outputs) ReadOutputSignatures(pui32Outputs, psInfo, 0, 0); - if(pui32Outputs11) + if (pui32Outputs11) ReadOutputSignatures(pui32Outputs11, psInfo, 1, 1); - if(pui32OutputsWithStreams) - ReadOutputSignatures(pui32OutputsWithStreams, psInfo, 0, 1); - if(pui32PatchConstants) - ReadPatchConstantSignatures(pui32PatchConstants, psInfo, 0, 0); - if (pui32PatchConstants11) - ReadPatchConstantSignatures(pui32PatchConstants11, psInfo, 1, 1); + if (pui32OutputsWithStreams) + ReadOutputSignatures(pui32OutputsWithStreams, psInfo, 0, 1); + if (pui32PatchConstants) + ReadPatchConstantSignatures(pui32PatchConstants, psInfo, 0, 0); + if (pui32PatchConstants11) + ReadPatchConstantSignatures(pui32PatchConstants11, psInfo, 1, 1); { uint32_t i; - for(i=0; ipsConstantBuffers.size();++i) + for (i = 0; i < psInfo->psConstantBuffers.size(); ++i) { - if (psInfo->psConstantBuffers[i].name == "$ThisPointer") + if (psInfo->psConstantBuffers[i].name == "$ThisPointer") { psInfo->psThisPointerConstBuffer = &psInfo->psConstantBuffers[i]; } } } } - diff --git a/src/toGLSL.cpp b/src/toGLSL.cpp index 42715c6..41707e3 100644 --- a/src/toGLSL.cpp +++ b/src/toGLSL.cpp @@ -27,307 +27,314 @@ // void ToGLSL::SetIOPrefixes() { - switch (psContext->psShader->eShaderType) - { - case VERTEX_SHADER: - psContext->inputPrefix = "in_"; - psContext->outputPrefix = "vs_"; - break; - - case HULL_SHADER: - // Input always coming from vertex shader - psContext->inputPrefix = "vs_"; - psContext->outputPrefix = "hs_"; - break; - - case DOMAIN_SHADER: - // There's no domain shader without hull shader - psContext->inputPrefix = "hs_"; - psContext->outputPrefix = "ds_"; - break; - - case GEOMETRY_SHADER: - // The input depends on whether there's a tessellation shader before us - if (psContext->psDependencies && (psContext->psDependencies->ui32ProgramStages & PS_FLAG_DOMAIN_SHADER)) - psContext->inputPrefix = "ds_"; - else - psContext->inputPrefix = "vs_"; - - psContext->outputPrefix = "gs_"; - break; + switch (psContext->psShader->eShaderType) + { + case VERTEX_SHADER: + psContext->inputPrefix = "in_"; + psContext->outputPrefix = "vs_"; + break; + + case HULL_SHADER: + // Input always coming from vertex shader + psContext->inputPrefix = "vs_"; + psContext->outputPrefix = "hs_"; + break; + + case DOMAIN_SHADER: + // There's no domain shader without hull shader + psContext->inputPrefix = "hs_"; + psContext->outputPrefix = "ds_"; + break; + + case GEOMETRY_SHADER: + // The input depends on whether there's a tessellation shader before us + if (psContext->psDependencies && (psContext->psDependencies->ui32ProgramStages & PS_FLAG_DOMAIN_SHADER)) + psContext->inputPrefix = "ds_"; + else + psContext->inputPrefix = "vs_"; + + psContext->outputPrefix = "gs_"; + break; case PIXEL_SHADER: - // The inputs can come from geom shader, domain shader or directly from vertex shader - if (psContext->psDependencies) - { - if (psContext->psDependencies->ui32ProgramStages & PS_FLAG_GEOMETRY_SHADER) - { - psContext->inputPrefix = "gs_"; - } - else if (psContext->psDependencies->ui32ProgramStages & PS_FLAG_DOMAIN_SHADER) - { - psContext->inputPrefix = "ds_"; - } - else - { - psContext->inputPrefix = "vs_"; - } - } - else - { - psContext->inputPrefix = "vs_"; - } - psContext->outputPrefix = ""; - break; - - - case COMPUTE_SHADER: - default: - // No prefixes - psContext->inputPrefix = ""; - psContext->outputPrefix = ""; - break; + // The inputs can come from geom shader, domain shader or directly from vertex shader + if (psContext->psDependencies) + { + if (psContext->psDependencies->ui32ProgramStages & PS_FLAG_GEOMETRY_SHADER) + { + psContext->inputPrefix = "gs_"; + } + else if (psContext->psDependencies->ui32ProgramStages & PS_FLAG_DOMAIN_SHADER) + { + psContext->inputPrefix = "ds_"; + } + else + { + psContext->inputPrefix = "vs_"; + } + } + else + { + psContext->inputPrefix = "vs_"; + } + psContext->outputPrefix = ""; + break; + + + case COMPUTE_SHADER: + default: + // No prefixes + psContext->inputPrefix = ""; + psContext->outputPrefix = ""; + break; } } - static void AddVersionDependentCode(HLSLCrossCompilerContext* psContext) { - bstring glsl = *psContext->currentGLSLString; - bstring extensions = psContext->extensions; - bool isES = (psContext->psShader->eTargetLanguage >= LANG_ES_100 && psContext->psShader->eTargetLanguage <= LANG_ES_310); - bool GL_ARB_shader_storage_buffer_object = false; - bool GL_ARB_shader_image_load_store = false; - - if(psContext->psShader->ui32MajorVersion > 3 && psContext->psShader->eTargetLanguage != LANG_ES_100 && psContext->psShader->eTargetLanguage != LANG_ES_300 && psContext->psShader->eTargetLanguage != LANG_ES_310 && !(psContext->psShader->eTargetLanguage >= LANG_330)) - { - psContext->EnableExtension("GL_ARB_shader_bit_encoding"); - } - - if(!HaveCompute(psContext->psShader->eTargetLanguage)) - { - if(psContext->psShader->eShaderType == COMPUTE_SHADER) - { - psContext->EnableExtension("GL_ARB_compute_shader"); - } - - if (psContext->psShader->aiOpcodeUsed[OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED] || - psContext->psShader->aiOpcodeUsed[OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW] || - psContext->psShader->aiOpcodeUsed[OPCODE_DCL_RESOURCE_STRUCTURED] || - psContext->psShader->aiOpcodeUsed[OPCODE_DCL_RESOURCE_RAW]) - { - GL_ARB_shader_storage_buffer_object = true; - } - } - - if (!HaveAtomicMem(psContext->psShader->eTargetLanguage) || - !HaveAtomicCounter(psContext->psShader->eTargetLanguage)) - { - if( psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_ALLOC] || - psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_CONSUME] || - psContext->psShader->aiOpcodeUsed[OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED]) - { - psContext->EnableExtension("GL_ARB_shader_atomic_counters"); - } - } - - if (psContext->psShader->aiOpcodeUsed[OPCODE_ATOMIC_CMP_STORE] || - psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_AND] || - psContext->psShader->aiOpcodeUsed[OPCODE_ATOMIC_AND] || - psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_IADD] || - psContext->psShader->aiOpcodeUsed[OPCODE_ATOMIC_IADD] || - psContext->psShader->aiOpcodeUsed[OPCODE_ATOMIC_OR] || - psContext->psShader->aiOpcodeUsed[OPCODE_ATOMIC_XOR] || - psContext->psShader->aiOpcodeUsed[OPCODE_ATOMIC_IMIN] || - psContext->psShader->aiOpcodeUsed[OPCODE_ATOMIC_UMIN] || - psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_IMAX] || - psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_IMIN] || - psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_UMAX] || - psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_UMIN] || - psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_OR] || - psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_XOR] || - psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_EXCH] || - psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_CMP_EXCH]) - { - if (!HaveAtomicMem(psContext->psShader->eTargetLanguage)) - GL_ARB_shader_storage_buffer_object = true; - - if (!HaveImageAtomics(psContext->psShader->eTargetLanguage)) - { - if (isES) - psContext->EnableExtension("GL_OES_shader_image_atomic"); - else - GL_ARB_shader_image_load_store = true; - } - } - - if(!HaveGather(psContext->psShader->eTargetLanguage)) - { - if(psContext->psShader->aiOpcodeUsed[OPCODE_GATHER4] || - psContext->psShader->aiOpcodeUsed[OPCODE_GATHER4_PO_C] || - psContext->psShader->aiOpcodeUsed[OPCODE_GATHER4_PO] || - psContext->psShader->aiOpcodeUsed[OPCODE_GATHER4_C]) - { - psContext->EnableExtension("GL_ARB_texture_gather"); - } - } - - if(IsESLanguage(psContext->psShader->eTargetLanguage)) - { - if (psContext->psShader->aiOpcodeUsed[OPCODE_DERIV_RTX_COARSE] || - psContext->psShader->aiOpcodeUsed[OPCODE_DERIV_RTX_FINE] || - psContext->psShader->aiOpcodeUsed[OPCODE_DERIV_RTX] || - psContext->psShader->aiOpcodeUsed[OPCODE_DERIV_RTY_COARSE] || - psContext->psShader->aiOpcodeUsed[OPCODE_DERIV_RTY_FINE] || - psContext->psShader->aiOpcodeUsed[OPCODE_DERIV_RTY]) - { - if (psContext->psShader->eTargetLanguage < LANG_ES_300) - { - psContext->EnableExtension("GL_OES_standard_derivatives"); - } - } - - if (psContext->psShader->eShaderType == PIXEL_SHADER && - (psContext->psShader->aiOpcodeUsed[OPCODE_SAMPLE_L] || - psContext->psShader->aiOpcodeUsed[OPCODE_SAMPLE_C_LZ] || - psContext->psShader->aiOpcodeUsed[OPCODE_SAMPLE_D])) - { - psContext->EnableExtension("GL_EXT_shader_texture_lod"); - - static const int tex_sampler_type_count = 4; - static const char* tex_sampler_dim_name[tex_sampler_type_count] = { - "1D", "2D", "3D", "Cube", - }; - - if (psContext->psShader->eTargetLanguage == LANG_ES_100) - { - bcatcstr(extensions,"#if !defined(GL_EXT_shader_texture_lod)\n"); - - for (int dim = 0; dim < tex_sampler_type_count; dim++) - { - bformata(extensions, "#define texture%sLodEXT texture%s\n", tex_sampler_dim_name[dim], tex_sampler_dim_name[dim]); - - if (dim == 1) // 2D - bformata(extensions, "#define texture%sProjLodEXT texture%sProj\n", tex_sampler_dim_name[dim], tex_sampler_dim_name[dim]); - } - bcatcstr(extensions,"#endif\n"); - } - } - } - - if(!HaveGatherNonConstOffset(psContext->psShader->eTargetLanguage)) - { - if(psContext->psShader->aiOpcodeUsed[OPCODE_GATHER4_PO_C] || - psContext->psShader->aiOpcodeUsed[OPCODE_GATHER4_PO]) - { - psContext->EnableExtension("GL_ARB_gpu_shader5"); - } - } - - if(!HaveQueryLod(psContext->psShader->eTargetLanguage)) - { - if(psContext->psShader->aiOpcodeUsed[OPCODE_LOD]) - { - psContext->EnableExtension("GL_ARB_texture_query_lod"); - } - } - - if(!HaveQueryLevels(psContext->psShader->eTargetLanguage)) - { - if(psContext->psShader->aiOpcodeUsed[OPCODE_RESINFO]) - { - psContext->EnableExtension("GL_ARB_texture_query_levels"); - psContext->EnableExtension("GL_ARB_shader_image_size"); - } - } - - if (psContext->psShader->aiOpcodeUsed[OPCODE_SAMPLE_INFO ]) - { - psContext->EnableExtension("GL_ARB_shader_texture_image_samples"); - } - - if(!HaveImageLoadStore(psContext->psShader->eTargetLanguage)) - { - if(psContext->psShader->aiOpcodeUsed[OPCODE_STORE_UAV_TYPED] || - psContext->psShader->aiOpcodeUsed[OPCODE_STORE_RAW] || - psContext->psShader->aiOpcodeUsed[OPCODE_STORE_STRUCTURED]) - { - GL_ARB_shader_image_load_store = true; - psContext->EnableExtension("GL_ARB_shader_bit_encoding"); - } - else - if(psContext->psShader->aiOpcodeUsed[OPCODE_LD_UAV_TYPED] || - psContext->psShader->aiOpcodeUsed[OPCODE_LD_RAW] || - psContext->psShader->aiOpcodeUsed[OPCODE_LD_STRUCTURED]) - { - GL_ARB_shader_image_load_store = true; - } - } - - if(!HaveGeometryShaderARB(psContext->psShader->eTargetLanguage)) - { - if(psContext->psShader->eShaderType == GEOMETRY_SHADER) - { - psContext->EnableExtension("GL_ARB_geometry_shader"); - } - } - - if(psContext->psShader->eTargetLanguage == LANG_ES_300 || psContext->psShader->eTargetLanguage == LANG_ES_310) - { - if(psContext->psShader->eShaderType == GEOMETRY_SHADER) - { - psContext->EnableExtension("GL_OES_geometry_shader"); - psContext->EnableExtension("GL_EXT_geometry_shader"); - } - } - - if(psContext->psShader->eTargetLanguage == LANG_ES_300 || psContext->psShader->eTargetLanguage == LANG_ES_310) - { - if(psContext->psShader->eShaderType == HULL_SHADER || psContext->psShader->eShaderType == DOMAIN_SHADER) - { - psContext->EnableExtension("GL_OES_tessellation_shader"); - psContext->EnableExtension("GL_EXT_tessellation_shader"); - } - } - - if (GL_ARB_shader_storage_buffer_object) - psContext->EnableExtension("GL_ARB_shader_storage_buffer_object"); - - if (GL_ARB_shader_image_load_store) - psContext->EnableExtension("GL_ARB_shader_image_load_store"); - - if(psContext->psShader->eShaderType == PIXEL_SHADER && psContext->psShader->eTargetLanguage >= LANG_120 && !HaveFragmentCoordConventions(psContext->psShader->eTargetLanguage)) - { - psContext->RequireExtension("GL_ARB_fragment_coord_conventions"); - } - - if (psContext->psShader->extensions->EXT_shader_framebuffer_fetch && psContext->psShader->eShaderType == PIXEL_SHADER && psContext->flags & HLSLCC_FLAG_SHADER_FRAMEBUFFER_FETCH) - { - psContext->EnableExtension("GL_EXT_shader_framebuffer_fetch"); - } - - //Handle fragment shader default precision - if (psContext->psShader->eShaderType == PIXEL_SHADER && - (psContext->psShader->eTargetLanguage == LANG_ES_100 || psContext->psShader->eTargetLanguage == LANG_ES_300 || psContext->psShader->eTargetLanguage == LANG_ES_310 || (psContext->flags & HLSLCC_FLAG_NVN_TARGET))) - { - if((psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) || (psContext->flags & HLSLCC_FLAG_NVN_TARGET)) - bcatcstr(glsl, "precision highp float;\n"); - else if (psContext->psShader->eTargetLanguage == LANG_ES_100) + bstring glsl = *psContext->currentGLSLString; + bstring extensions = psContext->extensions; + bool isES = (psContext->psShader->eTargetLanguage >= LANG_ES_100 && psContext->psShader->eTargetLanguage <= LANG_ES_310); + bool GL_ARB_shader_storage_buffer_object = false; + bool GL_ARB_shader_image_load_store = false; + + if (psContext->psShader->ui32MajorVersion > 3 && psContext->psShader->eTargetLanguage != LANG_ES_100 && psContext->psShader->eTargetLanguage != LANG_ES_300 && psContext->psShader->eTargetLanguage != LANG_ES_310 && !(psContext->psShader->eTargetLanguage >= LANG_330)) + { + psContext->EnableExtension("GL_ARB_shader_bit_encoding"); + } + + if (!HaveCompute(psContext->psShader->eTargetLanguage)) + { + if (psContext->psShader->eShaderType == COMPUTE_SHADER) + { + psContext->EnableExtension("GL_ARB_compute_shader"); + } + + if (psContext->psShader->aiOpcodeUsed[OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED] || + psContext->psShader->aiOpcodeUsed[OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW] || + psContext->psShader->aiOpcodeUsed[OPCODE_DCL_RESOURCE_STRUCTURED] || + psContext->psShader->aiOpcodeUsed[OPCODE_DCL_RESOURCE_RAW]) + { + GL_ARB_shader_storage_buffer_object = true; + } + } + + if (!HaveAtomicMem(psContext->psShader->eTargetLanguage) || + !HaveAtomicCounter(psContext->psShader->eTargetLanguage)) + { + if (psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_ALLOC] || + psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_CONSUME] || + psContext->psShader->aiOpcodeUsed[OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED]) + { + psContext->EnableExtension("GL_ARB_shader_atomic_counters"); + } + } + + if (psContext->psShader->aiOpcodeUsed[OPCODE_ATOMIC_CMP_STORE] || + psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_AND] || + psContext->psShader->aiOpcodeUsed[OPCODE_ATOMIC_AND] || + psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_IADD] || + psContext->psShader->aiOpcodeUsed[OPCODE_ATOMIC_IADD] || + psContext->psShader->aiOpcodeUsed[OPCODE_ATOMIC_OR] || + psContext->psShader->aiOpcodeUsed[OPCODE_ATOMIC_XOR] || + psContext->psShader->aiOpcodeUsed[OPCODE_ATOMIC_IMIN] || + psContext->psShader->aiOpcodeUsed[OPCODE_ATOMIC_UMIN] || + psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_IMAX] || + psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_IMIN] || + psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_UMAX] || + psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_UMIN] || + psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_OR] || + psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_XOR] || + psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_EXCH] || + psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_CMP_EXCH]) + { + if (!HaveAtomicMem(psContext->psShader->eTargetLanguage)) + GL_ARB_shader_storage_buffer_object = true; + + if (!HaveImageAtomics(psContext->psShader->eTargetLanguage)) + { + if (isES) + psContext->EnableExtension("GL_OES_shader_image_atomic"); + else + GL_ARB_shader_image_load_store = true; + } + } + + if (!HaveGather(psContext->psShader->eTargetLanguage)) + { + if (psContext->psShader->aiOpcodeUsed[OPCODE_GATHER4] || + psContext->psShader->aiOpcodeUsed[OPCODE_GATHER4_PO_C] || + psContext->psShader->aiOpcodeUsed[OPCODE_GATHER4_PO] || + psContext->psShader->aiOpcodeUsed[OPCODE_GATHER4_C]) + { + psContext->EnableExtension("GL_ARB_texture_gather"); + } + } + + if (IsESLanguage(psContext->psShader->eTargetLanguage)) + { + if (psContext->psShader->aiOpcodeUsed[OPCODE_DERIV_RTX_COARSE] || + psContext->psShader->aiOpcodeUsed[OPCODE_DERIV_RTX_FINE] || + psContext->psShader->aiOpcodeUsed[OPCODE_DERIV_RTX] || + psContext->psShader->aiOpcodeUsed[OPCODE_DERIV_RTY_COARSE] || + psContext->psShader->aiOpcodeUsed[OPCODE_DERIV_RTY_FINE] || + psContext->psShader->aiOpcodeUsed[OPCODE_DERIV_RTY]) + { + if (psContext->psShader->eTargetLanguage < LANG_ES_300) + { + psContext->EnableExtension("GL_OES_standard_derivatives"); + } + } + + if (psContext->psShader->eShaderType == PIXEL_SHADER && + (psContext->psShader->aiOpcodeUsed[OPCODE_SAMPLE_L] || + psContext->psShader->aiOpcodeUsed[OPCODE_SAMPLE_C_LZ] || + psContext->psShader->aiOpcodeUsed[OPCODE_SAMPLE_D])) + { + psContext->EnableExtension("GL_EXT_shader_texture_lod"); + + static const int tex_sampler_type_count = 4; + static const char* tex_sampler_dim_name[tex_sampler_type_count] = { + "1D", "2D", "3D", "Cube", + }; + + if (psContext->psShader->eTargetLanguage == LANG_ES_100) + { + bcatcstr(extensions, "#if !defined(GL_EXT_shader_texture_lod)\n"); + + for (int dim = 0; dim < tex_sampler_type_count; dim++) + { + bformata(extensions, "#define texture%sLodEXT texture%s\n", tex_sampler_dim_name[dim], tex_sampler_dim_name[dim]); + + if (dim == 1) // 2D + bformata(extensions, "#define texture%sProjLodEXT texture%sProj\n", tex_sampler_dim_name[dim], tex_sampler_dim_name[dim]); + } + bcatcstr(extensions, "#endif\n"); + } + } + } + + if (!HaveGatherNonConstOffset(psContext->psShader->eTargetLanguage)) + { + if (psContext->psShader->aiOpcodeUsed[OPCODE_GATHER4_PO_C] || + psContext->psShader->aiOpcodeUsed[OPCODE_GATHER4_PO]) + { + psContext->EnableExtension("GL_ARB_gpu_shader5"); + } + } + + if (!HaveQueryLod(psContext->psShader->eTargetLanguage)) + { + if (psContext->psShader->aiOpcodeUsed[OPCODE_LOD]) + { + psContext->EnableExtension("GL_ARB_texture_query_lod"); + } + } + + if (!HaveQueryLevels(psContext->psShader->eTargetLanguage)) + { + if (psContext->psShader->aiOpcodeUsed[OPCODE_RESINFO]) + { + psContext->EnableExtension("GL_ARB_texture_query_levels"); + psContext->EnableExtension("GL_ARB_shader_image_size"); + } + } + + if (psContext->psShader->aiOpcodeUsed[OPCODE_SAMPLE_INFO]) + { + psContext->EnableExtension("GL_ARB_shader_texture_image_samples"); + } + + if (!HaveImageLoadStore(psContext->psShader->eTargetLanguage)) + { + if (psContext->psShader->aiOpcodeUsed[OPCODE_STORE_UAV_TYPED] || + psContext->psShader->aiOpcodeUsed[OPCODE_STORE_RAW] || + psContext->psShader->aiOpcodeUsed[OPCODE_STORE_STRUCTURED]) + { + GL_ARB_shader_image_load_store = true; + psContext->EnableExtension("GL_ARB_shader_bit_encoding"); + } + else if (psContext->psShader->aiOpcodeUsed[OPCODE_LD_UAV_TYPED] || + psContext->psShader->aiOpcodeUsed[OPCODE_LD_RAW] || + psContext->psShader->aiOpcodeUsed[OPCODE_LD_STRUCTURED]) + { + GL_ARB_shader_image_load_store = true; + } + } + + if (!HaveGeometryShaderARB(psContext->psShader->eTargetLanguage)) + { + if (psContext->psShader->eShaderType == GEOMETRY_SHADER) + { + psContext->EnableExtension("GL_ARB_geometry_shader"); + } + } + + if (psContext->psShader->eTargetLanguage == LANG_ES_300 || psContext->psShader->eTargetLanguage == LANG_ES_310) + { + if (psContext->psShader->eShaderType == GEOMETRY_SHADER) + { + psContext->EnableExtension("GL_OES_geometry_shader"); + psContext->EnableExtension("GL_EXT_geometry_shader"); + } + } + + if (psContext->psShader->eTargetLanguage == LANG_ES_300 || psContext->psShader->eTargetLanguage == LANG_ES_310) + { + if (psContext->psShader->eShaderType == HULL_SHADER || psContext->psShader->eShaderType == DOMAIN_SHADER) + { + psContext->EnableExtension("GL_OES_tessellation_shader"); + psContext->EnableExtension("GL_EXT_tessellation_shader"); + } + } + + if (GL_ARB_shader_storage_buffer_object) + psContext->EnableExtension("GL_ARB_shader_storage_buffer_object"); + + if (GL_ARB_shader_image_load_store) + psContext->EnableExtension("GL_ARB_shader_image_load_store"); + + if (psContext->psShader->eShaderType == PIXEL_SHADER && psContext->psShader->eTargetLanguage >= LANG_120 && !HaveFragmentCoordConventions(psContext->psShader->eTargetLanguage)) + { + psContext->RequireExtension("GL_ARB_fragment_coord_conventions"); + } + + if (psContext->psShader->extensions->EXT_shader_framebuffer_fetch && psContext->psShader->eShaderType == PIXEL_SHADER && psContext->flags & HLSLCC_FLAG_SHADER_FRAMEBUFFER_FETCH) + { + psContext->EnableExtension("GL_EXT_shader_framebuffer_fetch"); + } + + //Handle fragment shader default precision + if (psContext->psShader->eShaderType == PIXEL_SHADER && + (psContext->psShader->eTargetLanguage == LANG_ES_100 || psContext->psShader->eTargetLanguage == LANG_ES_300 || psContext->psShader->eTargetLanguage == LANG_ES_310 || (psContext->flags & HLSLCC_FLAG_NVN_TARGET))) + { + if (psContext->psShader->eTargetLanguage == LANG_ES_100) + { // gles 2.0 shaders can have mediump as default if the GPU doesn't have highp support - bcatcstr(glsl, "#ifdef GL_FRAGMENT_PRECISION_HIGH\nprecision highp float;\n#else\nprecision mediump float;\n#endif\n"); - - // Define default int precision to highp to avoid issues on platforms that actually implement mediump - bcatcstr(glsl, "precision highp int;\n"); - } + bcatcstr(glsl, + "#ifdef GL_FRAGMENT_PRECISION_HIGH\n" + " precision highp float;\n" + "#else\n" + " precision mediump float;\n" + "#endif\n"); + } + else + { + bcatcstr(glsl, "precision highp float;\n"); + } + + // Define default int precision to highp to avoid issues on platforms that actually implement mediump + bcatcstr(glsl, "precision highp int;\n"); + } - if(psContext->psShader->eShaderType == PIXEL_SHADER && psContext->psShader->eTargetLanguage >= LANG_150) - { - if(psContext->flags & HLSLCC_FLAG_ORIGIN_UPPER_LEFT) - bcatcstr(glsl,"layout(origin_upper_left) in vec4 gl_FragCoord;\n"); + if (psContext->psShader->eShaderType == PIXEL_SHADER && psContext->psShader->eTargetLanguage >= LANG_150) + { + if (psContext->flags & HLSLCC_FLAG_ORIGIN_UPPER_LEFT) + bcatcstr(glsl, "layout(origin_upper_left) in vec4 gl_FragCoord;\n"); - if(psContext->flags & HLSLCC_FLAG_PIXEL_CENTER_INTEGER) - bcatcstr(glsl,"layout(pixel_center_integer) in vec4 gl_FragCoord;\n"); - } + if (psContext->flags & HLSLCC_FLAG_PIXEL_CENTER_INTEGER) + bcatcstr(glsl, "layout(pixel_center_integer) in vec4 gl_FragCoord;\n"); + } /* @@ -335,8 +342,8 @@ static void AddVersionDependentCode(HLSLCrossCompilerContext* psContext) To use any built-in input or output in the gl_PerVertex block in separable program objects, shader code must redeclare that block prior to use. */ - /* DISABLED FOR NOW */ -/* if(psContext->psShader->eShaderType == VERTEX_SHADER && psContext->psShader->eTargetLanguage >= LANG_410) + /* DISABLED FOR NOW */ +/* if(psContext->psShader->eShaderType == VERTEX_SHADER && psContext->psShader->eTargetLanguage >= LANG_410) { bcatcstr(glsl, "out gl_PerVertex {\n"); bcatcstr(glsl, "vec4 gl_Position;\n"); @@ -349,7 +356,7 @@ static void AddVersionDependentCode(HLSLCrossCompilerContext* psContext) GLLang ChooseLanguage(Shader* psShader) { // Depends on the HLSL shader model extracted from bytecode. - switch(psShader->ui32MajorVersion) + switch (psShader->ui32MajorVersion) { case 5: { @@ -368,7 +375,7 @@ GLLang ChooseLanguage(Shader* psShader) const char* GetVersionString(GLLang language) { - switch(language) + switch (language) { case LANG_ES_100: { @@ -445,104 +452,114 @@ const char* GetVersionString(GLLang language) static const char * GetPhaseFuncName(SHADER_PHASE_TYPE eType) { - switch (eType) - { - default: - case MAIN_PHASE: return ""; - case HS_GLOBAL_DECL_PHASE: return "hs_global_decls"; - case HS_FORK_PHASE: return "fork_phase"; - case HS_CTRL_POINT_PHASE: return "control_point_phase"; - case HS_JOIN_PHASE: return "join_phase"; - } + switch (eType) + { + default: + case MAIN_PHASE: return ""; + case HS_GLOBAL_DECL_PHASE: return "hs_global_decls"; + case HS_FORK_PHASE: return "fork_phase"; + case HS_CTRL_POINT_PHASE: return "control_point_phase"; + case HS_JOIN_PHASE: return "join_phase"; + } } static void DoHullShaderPassthrough(HLSLCrossCompilerContext *psContext) { - uint32_t i; - bstring glsl = psContext->glsl; - - for (i = 0; i < psContext->psShader->sInfo.psInputSignatures.size(); i++) - { - ShaderInfo::InOutSignature *psSig = &psContext->psShader->sInfo.psInputSignatures[i]; - const char *Type; - uint32_t ui32NumComponents = HLSLcc::GetNumberBitsSet(psSig->ui32Mask); - switch (psSig->eComponentType) - { - default: - case INOUT_COMPONENT_FLOAT32: - Type = ui32NumComponents > 1 ? "vec" : "float"; - break; - case INOUT_COMPONENT_SINT32: - Type = ui32NumComponents > 1 ? "ivec" : "int"; - break; - case INOUT_COMPONENT_UINT32: - Type = ui32NumComponents > 1 ? "uvec" : "uint"; - break; - } - if ((psSig->eSystemValueType == NAME_POSITION || psSig->semanticName == "POS") && psSig->ui32SemanticIndex == 0) - continue; - - std::string inputName; - - { - std::ostringstream oss; - oss << psContext->inputPrefix << psSig->semanticName << psSig->ui32SemanticIndex; - inputName = oss.str(); - } - - std::string outputName; - { - std::ostringstream oss; - oss << psContext->outputPrefix << psSig->semanticName << psSig->ui32SemanticIndex; - outputName = oss.str(); - } - - const char * prec = HavePrecisionQualifiers(psContext) ? "highp ": ""; - - psContext->AddIndentation(); - if (ui32NumComponents > 1) // TODO Precision - bformata(glsl, "in %s%s%d %s%s%d[];\n", prec, Type, ui32NumComponents, psContext->inputPrefix, psSig->semanticName.c_str(), psSig->ui32SemanticIndex); - else - bformata(glsl, "in %s%s %s%s%d[];\n", prec, Type, psContext->inputPrefix, psSig->semanticName.c_str(), psSig->ui32SemanticIndex); - - psContext->AddIndentation(); - if (ui32NumComponents > 1) // TODO Precision - bformata(glsl, "out %s%s%d %s%s%d[];\n", prec, Type, ui32NumComponents, psContext->outputPrefix, psSig->semanticName.c_str(), psSig->ui32SemanticIndex); - else - bformata(glsl, "out %s%s %s%s%d[];\n", prec, Type, psContext->outputPrefix, psSig->semanticName.c_str(), psSig->ui32SemanticIndex); - } - - psContext->AddIndentation(); - bcatcstr(glsl, "void passthrough_ctrl_points()\n"); - psContext->AddIndentation(); - bcatcstr(glsl, "{\n"); - psContext->indent++; - - for (i = 0; i < psContext->psShader->sInfo.psInputSignatures.size(); i++) - { - const ShaderInfo::InOutSignature *psSig = &psContext->psShader->sInfo.psInputSignatures[i]; - - psContext->AddIndentation(); - - if ((psSig->eSystemValueType == NAME_POSITION || psSig->semanticName == "POS") && psSig->ui32SemanticIndex == 0) - bformata(glsl, "gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"); - else - bformata(glsl, "%s%s%d[gl_InvocationID] = %s%s%d[gl_InvocationID];\n", psContext->outputPrefix, psSig->semanticName.c_str(), psSig->ui32SemanticIndex, psContext->inputPrefix, psSig->semanticName.c_str(), psSig->ui32SemanticIndex); - } - - psContext->indent--; - psContext->AddIndentation(); - bcatcstr(glsl, "}\n"); + uint32_t i; + bstring glsl = psContext->glsl; + + for (i = 0; i < psContext->psShader->sInfo.psInputSignatures.size(); i++) + { + ShaderInfo::InOutSignature *psSig = &psContext->psShader->sInfo.psInputSignatures[i]; + const char *Type; + uint32_t ui32NumComponents = HLSLcc::GetNumberBitsSet(psSig->ui32Mask); + switch (psSig->eComponentType) + { + default: + case INOUT_COMPONENT_FLOAT32: + Type = ui32NumComponents > 1 ? "vec" : "float"; + break; + case INOUT_COMPONENT_SINT32: + Type = ui32NumComponents > 1 ? "ivec" : "int"; + break; + case INOUT_COMPONENT_UINT32: + Type = ui32NumComponents > 1 ? "uvec" : "uint"; + break; + } + if ((psSig->eSystemValueType == NAME_POSITION || psSig->semanticName == "POS") && psSig->ui32SemanticIndex == 0) + continue; + + std::string inputName; + + { + std::ostringstream oss; + oss << psContext->inputPrefix << psSig->semanticName << psSig->ui32SemanticIndex; + inputName = oss.str(); + } + + std::string outputName; + { + std::ostringstream oss; + oss << psContext->outputPrefix << psSig->semanticName << psSig->ui32SemanticIndex; + outputName = oss.str(); + } + + const char * prec = ""; + if (HavePrecisionQualifiers(psContext)) + { + if (psSig->eMinPrec != MIN_PRECISION_DEFAULT) + prec = "mediump "; + else + prec = "highp "; + } + + int inLoc = psContext->psDependencies->GetVaryingLocation(inputName, HULL_SHADER, true); + int outLoc = psContext->psDependencies->GetVaryingLocation(outputName, HULL_SHADER, false); + + psContext->AddIndentation(); + if (ui32NumComponents > 1) + bformata(glsl, "layout(location = %d) in %s%s%d %s%s%d[];\n", inLoc, prec, Type, ui32NumComponents, psContext->inputPrefix, psSig->semanticName.c_str(), psSig->ui32SemanticIndex); + else + bformata(glsl, "layout(location = %d) in %s%s %s%s%d[];\n", inLoc, prec, Type, psContext->inputPrefix, psSig->semanticName.c_str(), psSig->ui32SemanticIndex); + + psContext->AddIndentation(); + if (ui32NumComponents > 1) + bformata(glsl, "layout(location = %d) out %s%s%d %s%s%d[];\n", outLoc, prec, Type, ui32NumComponents, psContext->outputPrefix, psSig->semanticName.c_str(), psSig->ui32SemanticIndex); + else + bformata(glsl, "layout(location = %d) out %s%s %s%s%d[];\n", outLoc, prec, Type, psContext->outputPrefix, psSig->semanticName.c_str(), psSig->ui32SemanticIndex); + } + + psContext->AddIndentation(); + bcatcstr(glsl, "void passthrough_ctrl_points()\n"); + psContext->AddIndentation(); + bcatcstr(glsl, "{\n"); + psContext->indent++; + + for (i = 0; i < psContext->psShader->sInfo.psInputSignatures.size(); i++) + { + const ShaderInfo::InOutSignature *psSig = &psContext->psShader->sInfo.psInputSignatures[i]; + + psContext->AddIndentation(); + + if ((psSig->eSystemValueType == NAME_POSITION || psSig->semanticName == "POS") && psSig->ui32SemanticIndex == 0) + bformata(glsl, "gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"); + else + bformata(glsl, "%s%s%d[gl_InvocationID] = %s%s%d[gl_InvocationID];\n", psContext->outputPrefix, psSig->semanticName.c_str(), psSig->ui32SemanticIndex, psContext->inputPrefix, psSig->semanticName.c_str(), psSig->ui32SemanticIndex); + } + + psContext->indent--; + psContext->AddIndentation(); + bcatcstr(glsl, "}\n"); } GLLang ToGLSL::SetLanguage(GLLang suggestedLanguage) { - language = suggestedLanguage; - if (language == LANG_DEFAULT) - { - language = ChooseLanguage(psContext->psShader); - } - return language; + language = suggestedLanguage; + if (language == LANG_DEFAULT) + { + language = ChooseLanguage(psContext->psShader); + } + return language; } bool ToGLSL::Translate() @@ -550,263 +567,280 @@ bool ToGLSL::Translate() bstring glsl; uint32_t i; Shader* psShader = psContext->psShader; - uint32_t ui32Phase; + uint32_t ui32Phase; - psContext->psTranslator = this; + psContext->psTranslator = this; - if (language == LANG_DEFAULT) - SetLanguage(LANG_DEFAULT); + if (language == LANG_DEFAULT) + SetLanguage(LANG_DEFAULT); - SetIOPrefixes(); - psShader->ExpandSWAPCs(); - psShader->ForcePositionToHighp(); - psShader->AnalyzeIOOverlap(); - psShader->FindUnusedGlobals(psContext->flags); + SetIOPrefixes(); + psShader->ExpandSWAPCs(); + psShader->ForcePositionToHighp(); + psShader->AnalyzeIOOverlap(); + psShader->FindUnusedGlobals(psContext->flags); psContext->indent = 0; - glsl = bfromcstralloc (1024 * 10, "\n"); - bstring extensions = bfromcstralloc (1024 * 10, GetVersionString(language)); + glsl = bfromcstralloc(1024 * 10, "\n"); + bstring extensions = bfromcstralloc(1024 * 10, GetVersionString(language)); psContext->extensions = extensions; psContext->glsl = glsl; - for(i=0; iasPhases.size();++i) + for (i = 0; i < psShader->asPhases.size(); ++i) { - psShader->asPhases[i].postShaderCode = bfromcstralloc (1024 * 5, ""); - psShader->asPhases[i].earlyMain = bfromcstralloc(1024 * 5, ""); - } + psShader->asPhases[i].postShaderCode = bfromcstralloc(1024 * 5, ""); + psShader->asPhases[i].earlyMain = bfromcstralloc(1024 * 5, ""); + } psContext->currentGLSLString = &glsl; psShader->eTargetLanguage = language; psContext->currentPhase = MAIN_PHASE; - if (psShader->extensions) - { - if(psContext->flags & HLSLCC_FLAG_NVN_TARGET) - { - psContext->EnableExtension("GL_ARB_separate_shader_objects"); - psContext->EnableExtension("GL_NV_desktop_lowp_mediump"); // This flag allow FP16 operations (mediump in GLSL) - } - if (psShader->extensions->ARB_explicit_attrib_location) - psContext->RequireExtension("GL_ARB_explicit_attrib_location"); - if (psShader->extensions->ARB_explicit_uniform_location) - psContext->RequireExtension("GL_ARB_explicit_uniform_location"); - if (psShader->extensions->ARB_shading_language_420pack) - psContext->RequireExtension("GL_ARB_shading_language_420pack"); - } + if (psShader->extensions) + { + if (psContext->flags & HLSLCC_FLAG_NVN_TARGET) + { + psContext->EnableExtension("GL_ARB_separate_shader_objects"); + psContext->EnableExtension("GL_NV_desktop_lowp_mediump"); // This flag allow FP16 operations (mediump in GLSL) + } + if (psShader->extensions->ARB_explicit_attrib_location) + psContext->RequireExtension("GL_ARB_explicit_attrib_location"); + if (psShader->extensions->ARB_explicit_uniform_location) + psContext->RequireExtension("GL_ARB_explicit_uniform_location"); + if (psShader->extensions->ARB_shading_language_420pack) + psContext->RequireExtension("GL_ARB_shading_language_420pack"); + } psContext->ClearDependencyData(); AddVersionDependentCode(psContext); - if(psShader->eShaderType == VERTEX_SHADER && + if (psShader->eShaderType == VERTEX_SHADER && HaveLimitedInOutLocationQualifier(language, psShader->extensions) && psContext->flags & HLSLCC_FLAG_NVN_TARGET) { bcatcstr(glsl, "out gl_PerVertex { vec4 gl_Position; };\n"); } - if (!psContext->psDependencies->m_ExtBlendModes.empty() && psShader->eShaderType == PIXEL_SHADER) - { - psContext->EnableExtension("GL_KHR_blend_equation_advanced"); - bcatcstr(glsl, "#if GL_KHR_blend_equation_advanced\n"); - for (i = 0; i < psContext->psDependencies->m_ExtBlendModes.size(); i++) - { - bformata(glsl, "layout(%s) out;\n", psContext->psDependencies->m_ExtBlendModes[i].c_str()); - } - bcatcstr(glsl, "#endif\n"); - } - - - psShader->PrepareStructuredBufferBindingSlots(); - - for (ui32Phase = 0; ui32Phase < psShader->asPhases.size(); ui32Phase++) - { - ShaderPhase &phase = psShader->asPhases[ui32Phase]; - phase.UnvectorizeImmMoves(); - psContext->DoDataTypeAnalysis(&phase); - phase.ResolveUAVProperties(); - psShader->ResolveStructuredBufferBindingSlots(&phase); - if(!psContext->IsVulkan()) - phase.PruneConstArrays(); - } - - psShader->PruneTempRegisters(); - - for (ui32Phase = 0; ui32Phase < psShader->asPhases.size(); ui32Phase++) - { - // Loop transform can only be done after the temps have been pruned - ShaderPhase &phase = psShader->asPhases[ui32Phase]; - HLSLcc::DoLoopTransform(psContext, phase); - - if ((psContext->flags & HLSLCC_FLAG_VULKAN_SPECIALIZATION_CONSTANTS) != 0) - { - IdentifyStaticBranches(&phase); - } - - } - - //Special case. Can have multiple phases. - if(psShader->eShaderType == HULL_SHADER) + if (!psContext->psDependencies->m_ExtBlendModes.empty() && psShader->eShaderType == PIXEL_SHADER) { - const SHADER_PHASE_TYPE ePhaseFuncCallOrder[3] = { HS_CTRL_POINT_PHASE, HS_FORK_PHASE, HS_JOIN_PHASE }; - uint32_t ui32PhaseCallIndex; - int perPatchSectionAdded = 0; - int hasControlPointPhase = 0; + psContext->EnableExtension("GL_KHR_blend_equation_advanced"); + bcatcstr(glsl, "#if GL_KHR_blend_equation_advanced\n"); + for (i = 0; i < psContext->psDependencies->m_ExtBlendModes.size(); i++) + { + bformata(glsl, "layout(%s) out;\n", psContext->psDependencies->m_ExtBlendModes[i].c_str()); + } + bcatcstr(glsl, "#endif\n"); + } + + + psShader->PrepareStructuredBufferBindingSlots(); - psShader->ConsolidateHullTempVars(); + for (ui32Phase = 0; ui32Phase < psShader->asPhases.size(); ui32Phase++) + { + ShaderPhase &phase = psShader->asPhases[ui32Phase]; + phase.UnvectorizeImmMoves(); + psContext->DoDataTypeAnalysis(&phase); + phase.ResolveUAVProperties(); + psShader->ResolveStructuredBufferBindingSlots(&phase); + if (!psContext->IsVulkan() && !psContext->IsSwitch()) + phase.PruneConstArrays(); + } - // Find out if we have a passthrough hull shader - for (ui32Phase = 2; ui32Phase < psShader->asPhases.size(); ui32Phase++) - { - if (psShader->asPhases[ui32Phase].ePhase == HS_CTRL_POINT_PHASE) - hasControlPointPhase = 1; - } + psShader->PruneTempRegisters(); - // Phase 1 is always the global decls phase, no instructions - for(i=0; i < psShader->asPhases[1].psDecl.size(); ++i) + for (ui32Phase = 0; ui32Phase < psShader->asPhases.size(); ui32Phase++) + { + // Loop transform can only be done after the temps have been pruned + ShaderPhase &phase = psShader->asPhases[ui32Phase]; + HLSLcc::DoLoopTransform(psContext, phase); + + if ((psContext->flags & HLSLCC_FLAG_VULKAN_SPECIALIZATION_CONSTANTS) != 0) { - TranslateDeclaration(&psShader->asPhases[1].psDecl[i]); + IdentifyStaticBranches(&phase); } + } - if (hasControlPointPhase == 0) - { - DoHullShaderPassthrough(psContext); - } + //Special case. Can have multiple phases. + if (psShader->eShaderType == HULL_SHADER) + { + const SHADER_PHASE_TYPE ePhaseFuncCallOrder[3] = { HS_CTRL_POINT_PHASE, HS_FORK_PHASE, HS_JOIN_PHASE }; + uint32_t ui32PhaseCallIndex; + int perPatchSectionAdded = 0; + int hasControlPointPhase = 0; + + psShader->ConsolidateHullTempVars(); + + // Find out if we have a passthrough hull shader + for (ui32Phase = 2; ui32Phase < psShader->asPhases.size(); ui32Phase++) + { + if (psShader->asPhases[ui32Phase].ePhase == HS_CTRL_POINT_PHASE) + hasControlPointPhase = 1; + } - for(ui32Phase=2; ui32PhaseasPhases.size(); ui32Phase++) - { - ShaderPhase *psPhase = &psShader->asPhases[ui32Phase]; - psContext->currentPhase = ui32Phase; + // Phase 1 is always the global decls phase, no instructions + for (i = 0; i < psShader->asPhases[1].psDecl.size(); ++i) + { + TranslateDeclaration(&psShader->asPhases[1].psDecl[i]); + } + + if (hasControlPointPhase == 0) + { + DoHullShaderPassthrough(psContext); + } + + for (ui32Phase = 2; ui32Phase < psShader->asPhases.size(); ui32Phase++) + { + ShaderPhase *psPhase = &psShader->asPhases[ui32Phase]; + psContext->currentPhase = ui32Phase; #ifdef _DEBUG - bformata(glsl, "//%s declarations\n", GetPhaseFuncName(psPhase->ePhase)); + bformata(glsl, "//%s declarations\n", GetPhaseFuncName(psPhase->ePhase)); #endif - for (i = 0; i < psPhase->psDecl.size(); ++i) - { - TranslateDeclaration(&psPhase->psDecl[i]); - } + for (i = 0; i < psPhase->psDecl.size(); ++i) + { + TranslateDeclaration(&psPhase->psDecl[i]); + } - if ((psContext->flags & HLSLCC_FLAG_VULKAN_SPECIALIZATION_CONSTANTS) != 0) - { - DeclareSpecializationConstants(*psPhase); - } + if ((psContext->flags & HLSLCC_FLAG_VULKAN_SPECIALIZATION_CONSTANTS) != 0) + { + DeclareSpecializationConstants(*psPhase); + } - bformata(glsl, "void %s%d(int phaseInstanceID)\n{\n", GetPhaseFuncName(psPhase->ePhase), ui32Phase); - psContext->indent++; + bformata(glsl, "void %s%d(int phaseInstanceID)\n{\n", GetPhaseFuncName(psPhase->ePhase), ui32Phase); + psContext->indent++; - if (psPhase->psInst.size() > 0) - { - //The minus one here is remove the return statement at end of phases. - //We don't want to translate that, we'll just end the function body. - ASSERT(psPhase->psInst[psPhase->psInst.size() - 1].eOpcode == OPCODE_RET); - for (i = 0; i < psPhase->psInst.size() - 1; ++i) - { - TranslateInstruction(&psPhase->psInst[i]); - } - } + if (psPhase->psInst.size() > 0) + { + //The minus one here is remove the return statement at end of phases. + //We don't want to translate that, we'll just end the function body. + ASSERT(psPhase->psInst[psPhase->psInst.size() - 1].eOpcode == OPCODE_RET); + for (i = 0; i < psPhase->psInst.size() - 1; ++i) + { + TranslateInstruction(&psPhase->psInst[i]); + } + } - psContext->indent--; - bcatcstr(glsl, "}\n"); - } + psContext->indent--; + bcatcstr(glsl, "}\n"); + } bcatcstr(glsl, "void main()\n{\n"); psContext->indent++; - // There are cases when there are no control point phases and we have to do passthrough - if (hasControlPointPhase == 0) - { - // Passthrough control point phase, run the rest only once per patch - psContext->AddIndentation(); - bcatcstr(glsl, "passthrough_ctrl_points();\n"); - psContext->AddIndentation(); - bcatcstr(glsl, "barrier();\n"); - psContext->AddIndentation(); - bcatcstr(glsl, "if (gl_InvocationID == 0)\n"); - psContext->AddIndentation(); - bcatcstr(glsl, "{\n"); - psContext->indent++; - perPatchSectionAdded = 1; - } - - for(ui32PhaseCallIndex=0; ui32PhaseCallIndex<3; ui32PhaseCallIndex++) - { - for (ui32Phase = 2; ui32Phase < psShader->asPhases.size(); ui32Phase++) - { - uint32_t i; - ShaderPhase *psPhase = &psShader->asPhases[ui32Phase]; - if (psPhase->ePhase != ePhaseFuncCallOrder[ui32PhaseCallIndex]) - continue; - - if (psPhase->earlyMain->slen > 1) - { + // There are cases when there are no control point phases and we have to do passthrough + if (hasControlPointPhase == 0) + { + // Passthrough control point phase, run the rest only once per patch + psContext->AddIndentation(); + bcatcstr(glsl, "passthrough_ctrl_points();\n"); + psContext->AddIndentation(); + bcatcstr(glsl, "barrier();\n"); + psContext->AddIndentation(); + bcatcstr(glsl, "if (gl_InvocationID == 0)\n"); + psContext->AddIndentation(); + bcatcstr(glsl, "{\n"); + psContext->indent++; + perPatchSectionAdded = 1; + } + + for (ui32PhaseCallIndex = 0; ui32PhaseCallIndex < 3; ui32PhaseCallIndex++) + { + for (ui32Phase = 2; ui32Phase < psShader->asPhases.size(); ui32Phase++) + { + uint32_t i; + ShaderPhase *psPhase = &psShader->asPhases[ui32Phase]; + if (psPhase->ePhase != ePhaseFuncCallOrder[ui32PhaseCallIndex]) + continue; + + if (psPhase->earlyMain->slen > 1) + { #ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//--- Start Early Main ---\n"); + psContext->AddIndentation(); + bcatcstr(glsl, "//--- Start Early Main ---\n"); #endif - bconcat(glsl, psPhase->earlyMain); + bconcat(glsl, psPhase->earlyMain); #ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//--- End Early Main ---\n"); + psContext->AddIndentation(); + bcatcstr(glsl, "//--- End Early Main ---\n"); #endif - } - - for (i = 0; i < psPhase->ui32InstanceCount; i++) - { + } - psContext->AddIndentation(); - bformata(glsl, "%s%d(%d);\n", GetPhaseFuncName(psShader->asPhases[ui32Phase].ePhase), ui32Phase, i); - } + for (i = 0; i < psPhase->ui32InstanceCount; i++) + { + psContext->AddIndentation(); + bformata(glsl, "%s%d(%d);\n", GetPhaseFuncName(psShader->asPhases[ui32Phase].ePhase), ui32Phase, i); + } - if (psPhase->hasPostShaderCode) - { + if (psPhase->hasPostShaderCode) + { #ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//--- Post shader code ---\n"); + psContext->AddIndentation(); + bcatcstr(glsl, "//--- Post shader code ---\n"); #endif - bconcat(glsl, psPhase->postShaderCode); + bconcat(glsl, psPhase->postShaderCode); #ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//--- End post shader code ---\n"); + psContext->AddIndentation(); + bcatcstr(glsl, "//--- End post shader code ---\n"); #endif - } - - - if (psShader->asPhases[ui32Phase].ePhase == HS_CTRL_POINT_PHASE) - { - // We're done printing control point phase, run the rest only once per patch - psContext->AddIndentation(); - bcatcstr(glsl, "barrier();\n"); - psContext->AddIndentation(); - bcatcstr(glsl, "if (gl_InvocationID == 0)\n"); - psContext->AddIndentation(); - bcatcstr(glsl, "{\n"); - psContext->indent++; - perPatchSectionAdded = 1; - } - } - } - - if (perPatchSectionAdded != 0) - { - psContext->indent--; - psContext->AddIndentation(); - bcatcstr(glsl, "}\n"); - } - - psContext->indent--; + } + + + if (psShader->asPhases[ui32Phase].ePhase == HS_CTRL_POINT_PHASE) + { + // We're done printing control point phase, run the rest only once per patch + psContext->AddIndentation(); + bcatcstr(glsl, "barrier();\n"); + psContext->AddIndentation(); + bcatcstr(glsl, "if (gl_InvocationID == 0)\n"); + psContext->AddIndentation(); + bcatcstr(glsl, "{\n"); + psContext->indent++; + perPatchSectionAdded = 1; + } + } + } + + if (perPatchSectionAdded != 0) + { + psContext->indent--; + psContext->AddIndentation(); + bcatcstr(glsl, "}\n"); + } + + psContext->indent--; bcatcstr(glsl, "}\n"); + // Print out extra functions we generated, in reverse order for potential dependencies + std::for_each(m_FunctionDefinitions.rbegin(), m_FunctionDefinitions.rend(), [&extensions](const FunctionDefinitions::value_type &p) + { + bcatcstr(extensions, p.second.c_str()); + bcatcstr(extensions, "\n"); + }); + // Concat extensions and glsl for the final shader code. + if (m_NeedUnityInstancingArraySizeDecl) + { + if (psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) + { + bformata(extensions, "layout(constant_id = %d) const int %s = 2;\n", kArraySizeConstantID, UNITY_RUNTIME_INSTANCING_ARRAY_SIZE_MACRO); + } + else + { + bcatcstr(extensions, "#ifndef " UNITY_RUNTIME_INSTANCING_ARRAY_SIZE_MACRO "\n\t#define " UNITY_RUNTIME_INSTANCING_ARRAY_SIZE_MACRO " 2\n#endif\n"); + } + } + bconcat(extensions, glsl); bdestroy(glsl); psContext->glsl = extensions; glsl = NULL; - if(psContext->psDependencies) + if (psContext->psDependencies) { //Save partitioning and primitive type for use by domain shader. psContext->psDependencies->eTessOutPrim = psShader->sInfo.eTessOutPrim; @@ -815,19 +849,19 @@ bool ToGLSL::Translate() } return true; - } + } - if(psShader->eShaderType == DOMAIN_SHADER && psContext->psDependencies) + if (psShader->eShaderType == DOMAIN_SHADER && psContext->psDependencies) { //Load partitioning and primitive type from hull shader. - switch(psContext->psDependencies->eTessOutPrim) - { - case TESSELLATOR_OUTPUT_TRIANGLE_CCW: - { - bcatcstr(glsl, "layout(ccw) in;\n"); - break; - } - case TESSELLATOR_OUTPUT_TRIANGLE_CW: + switch (psContext->psDependencies->eTessOutPrim) + { + case TESSELLATOR_OUTPUT_TRIANGLE_CCW: + { + bcatcstr(glsl, "layout(ccw) in;\n"); + break; + } + case TESSELLATOR_OUTPUT_TRIANGLE_CW: { bcatcstr(glsl, "layout(cw) in;\n"); break; @@ -843,7 +877,7 @@ bool ToGLSL::Translate() } } - switch(psContext->psDependencies->eTessPartitioning) + switch (psContext->psDependencies->eTessPartitioning) { case TESSELLATOR_PARTITIONING_FRACTIONAL_ODD: { @@ -862,6 +896,7 @@ bool ToGLSL::Translate() } } + bstring generatedFunctionsKeyword = bfromcstr("\n// Generated functions\n\n"); bstring beforeMain = NULL; bstring beforeMainKeyword = NULL; @@ -872,15 +907,18 @@ bool ToGLSL::Translate() psContext->beforeMain = beforeMain; } - for (i = 0; i < psShader->asPhases[0].psDecl.size(); ++i) - { - TranslateDeclaration(&psShader->asPhases[0].psDecl[i]); - } + for (i = 0; i < psShader->asPhases[0].psDecl.size(); ++i) + { + TranslateDeclaration(&psShader->asPhases[0].psDecl[i]); + } - if ((psContext->flags & HLSLCC_FLAG_VULKAN_SPECIALIZATION_CONSTANTS) != 0) - { - DeclareSpecializationConstants(psShader->asPhases[0]); - } + if ((psContext->flags & HLSLCC_FLAG_VULKAN_SPECIALIZATION_CONSTANTS) != 0) + { + DeclareSpecializationConstants(psShader->asPhases[0]); + } + + // Search and replace string, for injecting generated functions that need to be after default precision declarations + bconcat(glsl, generatedFunctionsKeyword); // Search and replace string, for injecting stuff from translation that need to be after normal declarations and before main if (!HaveDynamicIndexing(psContext)) @@ -892,34 +930,41 @@ bool ToGLSL::Translate() psContext->indent++; - if (psContext->psShader->asPhases[0].earlyMain->slen > 1) - { + if (psContext->psShader->asPhases[0].earlyMain->slen > 1) + { #ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//--- Start Early Main ---\n"); + psContext->AddIndentation(); + bcatcstr(glsl, "//--- Start Early Main ---\n"); #endif - bconcat(glsl, psContext->psShader->asPhases[0].earlyMain); + bconcat(glsl, psContext->psShader->asPhases[0].earlyMain); #ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//--- End Early Main ---\n"); + psContext->AddIndentation(); + bcatcstr(glsl, "//--- End Early Main ---\n"); #endif - } + } - for(i=0; i < psShader->asPhases[0].psInst.size(); ++i) + for (i = 0; i < psShader->asPhases[0].psInst.size(); ++i) { - TranslateInstruction(&psShader->asPhases[0].psInst[i]); + TranslateInstruction(&psShader->asPhases[0].psInst[i]); } psContext->indent--; bcatcstr(glsl, "}\n"); - // Print out extra functions we generated, in reverse order for potential dependencies - std::for_each(m_FunctionDefinitions.rbegin(), m_FunctionDefinitions.rend(), [&extensions](const FunctionDefinitions::value_type &p) + // Print out extra functions we generated in generation order to satisfy dependencies { - bcatcstr(extensions, p.second.c_str()); - bcatcstr(extensions, "\n"); - }); + bstring generatedFunctions = bfromcstr(""); + for (std::vector::const_iterator funcNameIter = m_FunctionDefinitionsOrder.begin(); funcNameIter != m_FunctionDefinitionsOrder.end(); ++funcNameIter) + { + const FunctionDefinitions::const_iterator definition = m_FunctionDefinitions.find(*funcNameIter); + ASSERT(definition != m_FunctionDefinitions.end()); + bcatcstr(generatedFunctions, definition->second.c_str()); + bcatcstr(generatedFunctions, "\n"); + } + bfindreplace(glsl, generatedFunctionsKeyword, generatedFunctions, 0); + bdestroy(generatedFunctions); + } // Concat extensions and glsl for the final shader code. if (m_NeedUnityInstancingArraySizeDecl) @@ -960,331 +1005,339 @@ bool ToGLSL::Translate() bool ToGLSL::DeclareExtraFunction(const std::string &name, bstring body) { - if (m_FunctionDefinitions.find(name) != m_FunctionDefinitions.end()) - return true; - m_FunctionDefinitions.insert(std::make_pair(name, (const char *) body->data)); - return false; + if (m_FunctionDefinitions.find(name) != m_FunctionDefinitions.end()) + return true; + m_FunctionDefinitions.insert(std::make_pair(name, (const char *)body->data)); + m_FunctionDefinitionsOrder.push_back(name); + return false; } static void PrintComponentWrapper1(bstring code, const char *func, const char *type2, const char *type3, const char *type4) { - bformata(code, "%s %s(%s a) { a.x = %s(a.x); a.y = %s(a.y); return a; }\n", type2, func, type2, func, func); - bformata(code, "%s %s(%s a) { a.x = %s(a.x); a.y = %s(a.y); a.z = %s(a.z); return a; }\n", type3, func, type3, func, func, func); - bformata(code, "%s %s(%s a) { a.x = %s(a.x); a.y = %s(a.y); a.z = %s(a.z); a.w = %s(a.w); return a; }\n", type4, func, type4, func, func, func, func); + bformata(code, "%s %s(%s a) { a.x = %s(a.x); a.y = %s(a.y); return a; }\n", type2, func, type2, func, func); + bformata(code, "%s %s(%s a) { a.x = %s(a.x); a.y = %s(a.y); a.z = %s(a.z); return a; }\n", type3, func, type3, func, func, func); + bformata(code, "%s %s(%s a) { a.x = %s(a.x); a.y = %s(a.y); a.z = %s(a.z); a.w = %s(a.w); return a; }\n", type4, func, type4, func, func, func, func); } static void PrintComponentWrapper2(bstring code, const char *func, const char *type2, const char *type3, const char *type4) { - bformata(code, "%s %s(%s a, %s b) { a.x = %s(a.x, b.x); a.y = %s(a.y, b.y); return a; }\n", type2, func, type2, type2, func, func); - bformata(code, "%s %s(%s a, %s b) { a.x = %s(a.x, b.x); a.y = %s(a.y, b.y); a.z = %s(a.z, b.z); return a; }\n", type3, func, type3, type3, func, func, func); - bformata(code, "%s %s(%s a, %s b) { a.x = %s(a.x, b.x); a.y = %s(a.y, b.y); a.z = %s(a.z, b.z); a.w = %s(a.w, b.w); return a; }\n", type4, func, type4, type4, func, func, func, func); + bformata(code, "%s %s(%s a, %s b) { a.x = %s(a.x, b.x); a.y = %s(a.y, b.y); return a; }\n", type2, func, type2, type2, func, func); + bformata(code, "%s %s(%s a, %s b) { a.x = %s(a.x, b.x); a.y = %s(a.y, b.y); a.z = %s(a.z, b.z); return a; }\n", type3, func, type3, type3, func, func, func); + bformata(code, "%s %s(%s a, %s b) { a.x = %s(a.x, b.x); a.y = %s(a.y, b.y); a.z = %s(a.z, b.z); a.w = %s(a.w, b.w); return a; }\n", type4, func, type4, type4, func, func, func, func); } static void PrintTrunc(bstring code, const char *type) { - bformata(code, "%s trunc(%s x) { return sign(x)*floor(abs(x)); }\n", type, type); + bformata(code, "%s trunc(%s x) { return sign(x)*floor(abs(x)); }\n", type, type); } void ToGLSL::UseExtraFunctionDependency(const std::string &name) { - if (m_FunctionDefinitions.find(name) != m_FunctionDefinitions.end()) - return; - - bstring code = bfromcstr(""); - bool match = true; - - if (name == "trunc") - { - PrintTrunc(code, "float"); - PrintTrunc(code, "vec2"); - PrintTrunc(code, "vec3"); - PrintTrunc(code, "vec4"); - } - else if (name == "roundEven") - { - bformata(code, "float roundEven(float x) { float y = floor(x + 0.5); return (y - x == 0.5) ? floor(0.5*y) * 2.0 : y; }\n"); - PrintComponentWrapper1(code, "roundEven", "vec2", "vec3", "vec4"); - } - else if (name == "op_modi") - { - bformata(code, "const int BITWISE_BIT_COUNT = 32;\nint op_modi(int x, int y) { return x - y * (x / y); }\n"); - PrintComponentWrapper2(code, "op_modi", "ivec2", "ivec3", "ivec4"); - } - else if (name == "op_and") - { - UseExtraFunctionDependency("op_modi"); - - bformata(code, "int op_and(int a, int b) { int result = 0; int n = 1; for (int i = 0; i < BITWISE_BIT_COUNT; i++) { if ((op_modi(a, 2) == 1) && (op_modi(b, 2) == 1)) { result += n; } a = a / 2; b = b / 2; n = n * 2; if (!(a > 0 && b > 0)) { break; } } return result; }\n"); - PrintComponentWrapper2(code, "op_and", "ivec2", "ivec3", "ivec4"); - } - else if (name == "op_or") - { - UseExtraFunctionDependency("op_modi"); - - bformata(code, "int op_or(int a, int b) { int result = 0; int n = 1; for (int i = 0; i < BITWISE_BIT_COUNT; i++) { if ((op_modi(a, 2) == 1) || (op_modi(b, 2) == 1)) { result += n; } a = a / 2; b = b / 2; n = n * 2; if (!(a > 0 || b > 0)) { break; } } return result; }\n"); - PrintComponentWrapper2(code, "op_or", "ivec2", "ivec3", "ivec4"); - } - else if (name == "op_xor") - { - UseExtraFunctionDependency("op_and"); - - bformata(code, "int op_xor(int a, int b) { return (a + b - 2 * op_and(a, b)); }\n"); - PrintComponentWrapper2(code, "op_xor", "ivec2", "ivec3", "ivec4"); - } - else if (name == "op_shr") - { - bformata(code, "int op_shr(int a, int b) { return int(floor(float(a) / pow(2.0, float(b)))); }\n"); - PrintComponentWrapper2(code, "op_shr", "ivec2", "ivec3", "ivec4"); - } - else if (name == "op_shl") - { - bformata(code, "int op_shl(int a, int b) { return int(floor(float(a) * pow(2.0, float(b)))); }\n"); - PrintComponentWrapper2(code, "op_shl", "ivec2", "ivec3", "ivec4"); - } - else if (name == "op_not") - { - bformata(code, "int op_not(int value) { return -value - 1; }\n"); - PrintComponentWrapper1(code, "op_not", "ivec2", "ivec3", "ivec4"); - } - else - { - match = false; - } - - if (match) - DeclareExtraFunction(name, code); - - bdestroy(code); + if (m_FunctionDefinitions.find(name) != m_FunctionDefinitions.end()) + return; + + bstring code = bfromcstr(""); + bool match = true; + + if (name == "trunc") + { + PrintTrunc(code, "float"); + PrintTrunc(code, "vec2"); + PrintTrunc(code, "vec3"); + PrintTrunc(code, "vec4"); + } + else if (name == "roundEven") + { + bformata(code, "float roundEven(float x) { float y = floor(x + 0.5); return (y - x == 0.5) ? floor(0.5*y) * 2.0 : y; }\n"); + PrintComponentWrapper1(code, "roundEven", "vec2", "vec3", "vec4"); + } + else if (name == "op_modi") + { + bformata(code, "const int BITWISE_BIT_COUNT = 32;\nint op_modi(int x, int y) { return x - y * (x / y); }\n"); + PrintComponentWrapper2(code, "op_modi", "ivec2", "ivec3", "ivec4"); + } + else if (name == "op_and") + { + UseExtraFunctionDependency("op_modi"); + + bformata(code, "int op_and(int a, int b) { int result = 0; int n = 1; for (int i = 0; i < BITWISE_BIT_COUNT; i++) { if ((op_modi(a, 2) != 0) && (op_modi(b, 2) != 0)) { result += n; } a = a / 2; b = b / 2; n = n * 2; if (!(a > 0 && b > 0)) { break; } } return result; }\n"); + PrintComponentWrapper2(code, "op_and", "ivec2", "ivec3", "ivec4"); + } + else if (name == "op_or") + { + UseExtraFunctionDependency("op_modi"); + + bformata(code, "int op_or(int a, int b) { int result = 0; int n = 1; for (int i = 0; i < BITWISE_BIT_COUNT; i++) { if ((op_modi(a, 2) != 0) || (op_modi(b, 2) != 0)) { result += n; } a = a / 2; b = b / 2; n = n * 2; if (!(a > 0 || b > 0)) { break; } } return result; }\n"); + PrintComponentWrapper2(code, "op_or", "ivec2", "ivec3", "ivec4"); + } + else if (name == "op_xor") + { + UseExtraFunctionDependency("op_and"); + + bformata(code, "int op_xor(int a, int b) { return (a + b - 2 * op_and(a, b)); }\n"); + PrintComponentWrapper2(code, "op_xor", "ivec2", "ivec3", "ivec4"); + } + else if (name == "op_shr") + { + bformata(code, "int op_shr(int a, int b) { return int(floor(float(a) / pow(2.0, float(b)))); }\n"); + PrintComponentWrapper2(code, "op_shr", "ivec2", "ivec3", "ivec4"); + } + else if (name == "op_shl") + { + bformata(code, "int op_shl(int a, int b) { return int(floor(float(a) * pow(2.0, float(b)))); }\n"); + PrintComponentWrapper2(code, "op_shl", "ivec2", "ivec3", "ivec4"); + } + else if (name == "op_not") + { + bformata(code, "int op_not(int value) { return -value - 1; }\n"); + PrintComponentWrapper1(code, "op_not", "ivec2", "ivec3", "ivec4"); + } + else if (name == "int_bitfieldInsert") + { + // Can't use the name 'bitfieldInsert' because Adreno fails with "can't redefine/overload built-in functions!" + bcatcstr(code, + "int int_bitfieldInsert(int base, int insert, int offset, int bits) {\n" + " uint mask = ~(uint(0xffffffff) << uint(bits)) << uint(offset);\n" + " return int((uint(base) & ~mask) | ((uint(insert) << uint(offset)) & mask));\n" + "}\n"); + } + else + { + match = false; + } + + if (match) + DeclareExtraFunction(name, code); + + bdestroy(code); } void ToGLSL::DeclareSpecializationConstants(ShaderPhase &phase) { - bstring glsl = psContext->glsl; - // There may be several uses for the same branch condition, so we'll need to keep track of what we've already declared. - std::set alreadyDeclared; - for (std::vector::iterator itr = phase.m_StaticBranchInstructions.begin(); itr != phase.m_StaticBranchInstructions.end(); itr++) - { - Instruction &i = **itr; - uint32_t slot = psContext->psDependencies->GetSpecializationConstantSlot(i.m_StaticBranchName); - if(alreadyDeclared.insert(slot).second) // Only declare if the insertion actually succeeded - bformata(glsl, "layout(constant_id = %d) const bool %s = false;\n", slot, i.m_StaticBranchName.c_str()); - } + bstring glsl = psContext->glsl; + // There may be several uses for the same branch condition, so we'll need to keep track of what we've already declared. + std::set alreadyDeclared; + for (std::vector::iterator itr = phase.m_StaticBranchInstructions.begin(); itr != phase.m_StaticBranchInstructions.end(); itr++) + { + Instruction &i = **itr; + uint32_t slot = psContext->psDependencies->GetSpecializationConstantSlot(i.m_StaticBranchName); + if (alreadyDeclared.insert(slot).second) // Only declare if the insertion actually succeeded + bformata(glsl, "layout(constant_id = %d) const bool %s = false;\n", slot, i.m_StaticBranchName.c_str()); + } } std::string to64(uint32_t in) { - const char to64[] = - "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; - - char c_[2]; - c_[0] = to64[in]; - c_[1] = 0; - char c = c_[0]; - if (c == 'X') - return "XX"; - if (c == '+') - return "XA"; - if (c == '/') - return "XB"; - return std::string(c_); + const char to64[] = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + + char c_[2]; + c_[0] = to64[in]; + c_[1] = 0; + char c = c_[0]; + if (c == 'X') + return "XX"; + if (c == '+') + return "XA"; + if (c == '/') + return "XB"; + return std::string(c_); } // Slightly custom base64, espace non-identifier chars with 'X' static void Base64Encode(const std::string &in, std::string& result) { + size_t len = in.length(); + size_t outputLen = (len + 2) / 3 * 4; + unsigned char *bytes = (unsigned char *)&in[0]; - size_t len = in.length(); - size_t outputLen = (len + 2) / 3 * 4; - unsigned char *bytes = (unsigned char *)&in[0]; - - result.clear(); - result.reserve(outputLen); - - int i = 0; - unsigned char b1, b2, b3; - for (int chunk = 0; len > 0; len -= 3, chunk++) { - b1 = bytes[i++]; - b2 = len > 1 ? bytes[i++] : '\0'; - result += to64(b1 >> 2); - result += to64(((b1 & 3) << 4) | ((b2 & 0xf0) >> 4)); - if (len > 2) - { - b3 = bytes[i++]; - result += to64(((b2 & 0xF) << 2) | ((b3 & 0xC0) >> 6)); - result += to64(b3 & 0x3F); - } - else if (len == 2) - { - result += to64((b2 & 0xF) << 2); - result += "XC"; - break; - } - else /* len == 1 */ - { - result += "XC"; - break; - } - } -} + result.clear(); + result.reserve(outputLen); + int i = 0; + unsigned char b1, b2, b3; + for (int chunk = 0; len > 0; len -= 3, chunk++) + { + b1 = bytes[i++]; + b2 = len > 1 ? bytes[i++] : '\0'; + result += to64(b1 >> 2); + result += to64(((b1 & 3) << 4) | ((b2 & 0xf0) >> 4)); + if (len > 2) + { + b3 = bytes[i++]; + result += to64(((b2 & 0xF) << 2) | ((b3 & 0xC0) >> 6)); + result += to64(b3 & 0x3F); + } + else if (len == 2) + { + result += to64((b2 & 0xF) << 2); + result += "XC"; + break; + } + else /* len == 1 */ + { + result += "XC"; + break; + } + } +} bool ToGLSL::BuildStaticBranchNameForInstruction(Instruction &inst) { - std::ostringstream oss; - if (!inst.m_StaticBranchCondition) - { - // Simple case, just get the value, check if nonzero - bstring varname = bfromcstr(""); - SHADER_VARIABLE_TYPE argType = inst.asOperands[0].GetDataType(psContext); - uint32_t flag = TO_FLAG_NONE; - switch (argType) - { - case SVT_BOOL: - flag = TO_FLAG_BOOL; - break; - case SVT_INT: - case SVT_INT12: - case SVT_INT16: - flag = TO_FLAG_INTEGER; - break; - case SVT_UINT: - case SVT_UINT16: - case SVT_UINT8: - flag = TO_FLAG_UNSIGNED_INTEGER; - break; - default: - break; - } - TranslateOperand(varname, &inst.asOperands[0], flag); - char *str = bstr2cstr(varname, '\0'); - oss << str; - bcstrfree(str); - bdestroy(varname); - oss << "!=0"; - std::string res = oss.str(); - // Sanity checks: no arrays, no matrices - if (res.find('[') != std::string::npos) - return false; - if (res.find("hlslcc_mtx") != std::string::npos) - return false; - Base64Encode(res, inst.m_StaticBranchName); - } - else - { - // Indirect, just store the whole previous instruction and then the condition - bstring res = bfromcstr(""); - - bstring *oldglsl = psContext->currentGLSLString; - psContext->currentGLSLString = &res; - TranslateInstruction((Instruction *)inst.m_StaticBranchCondition, true); - psContext->currentGLSLString = oldglsl; - - SHADER_VARIABLE_TYPE argType = inst.asOperands[0].GetDataType(psContext); - uint32_t flag = TO_FLAG_NONE; - switch (argType) - { - case SVT_BOOL: - flag = TO_FLAG_BOOL; - break; - case SVT_INT: - case SVT_INT12: - case SVT_INT16: - flag = TO_FLAG_INTEGER; - break; - case SVT_UINT: - case SVT_UINT16: - case SVT_UINT8: - flag = TO_FLAG_UNSIGNED_INTEGER; - break; - default: - break; - } - - if (argType == SVT_BOOL) - { - if (inst.eBooleanTestType == INSTRUCTION_TEST_ZERO) - bcatcstr(res, "!"); - } - - TranslateOperand(res, &inst.asOperands[0], flag); - char *str = bstr2cstr(res, '\0'); - oss << str; - bcstrfree(str); - bdestroy(res); - if(argType != SVT_BOOL) - oss << "!=0"; - - std::string ress = oss.str(); - // Sanity checks: no arrays, no matrices - if (ress.find('[') != std::string::npos) - return false; - if (ress.find("hlslcc_mtx") != std::string::npos) - return false; - Base64Encode(ress, inst.m_StaticBranchName); - } - return true; + std::ostringstream oss; + if (!inst.m_StaticBranchCondition) + { + // Simple case, just get the value, check if nonzero + bstring varname = bfromcstr(""); + SHADER_VARIABLE_TYPE argType = inst.asOperands[0].GetDataType(psContext); + uint32_t flag = TO_FLAG_NONE; + switch (argType) + { + case SVT_BOOL: + flag = TO_FLAG_BOOL; + break; + case SVT_INT: + case SVT_INT12: + case SVT_INT16: + flag = TO_FLAG_INTEGER; + break; + case SVT_UINT: + case SVT_UINT16: + case SVT_UINT8: + flag = TO_FLAG_UNSIGNED_INTEGER; + break; + default: + break; + } + TranslateOperand(varname, &inst.asOperands[0], flag); + char *str = bstr2cstr(varname, '\0'); + oss << str; + bcstrfree(str); + bdestroy(varname); + oss << "!=0"; + std::string res = oss.str(); + // Sanity checks: no arrays, no matrices + if (res.find('[') != std::string::npos) + return false; + if (res.find("hlslcc_mtx") != std::string::npos) + return false; + Base64Encode(res, inst.m_StaticBranchName); + } + else + { + // Indirect, just store the whole previous instruction and then the condition + bstring res = bfromcstr(""); + bstring *oldglsl = psContext->currentGLSLString; + psContext->currentGLSLString = &res; + TranslateInstruction((Instruction *)inst.m_StaticBranchCondition, true); + psContext->currentGLSLString = oldglsl; + + SHADER_VARIABLE_TYPE argType = inst.asOperands[0].GetDataType(psContext); + uint32_t flag = TO_FLAG_NONE; + switch (argType) + { + case SVT_BOOL: + flag = TO_FLAG_BOOL; + break; + case SVT_INT: + case SVT_INT12: + case SVT_INT16: + flag = TO_FLAG_INTEGER; + break; + case SVT_UINT: + case SVT_UINT16: + case SVT_UINT8: + flag = TO_FLAG_UNSIGNED_INTEGER; + break; + default: + break; + } + + if (argType == SVT_BOOL) + { + if (inst.eBooleanTestType == INSTRUCTION_TEST_ZERO) + bcatcstr(res, "!"); + } + + TranslateOperand(res, &inst.asOperands[0], flag); + char *str = bstr2cstr(res, '\0'); + oss << str; + bcstrfree(str); + bdestroy(res); + if (argType != SVT_BOOL) + oss << "!=0"; + + std::string ress = oss.str(); + // Sanity checks: no arrays, no matrices + if (ress.find('[') != std::string::npos) + return false; + if (ress.find("hlslcc_mtx") != std::string::npos) + return false; + Base64Encode(ress, inst.m_StaticBranchName); + } + return true; } void ToGLSL::IdentifyStaticBranches(ShaderPhase *psPhase) { - for (std::vector::iterator itr = psPhase->psInst.begin(); itr != psPhase->psInst.end(); itr++) - { - Instruction &i = *itr; - - if (!i.IsConditionalBranchInstruction()) - continue; - - // Simple case, direct conditional branch - if (i.asOperands[0].eType == OPERAND_TYPE_CONSTANT_BUFFER) - { - i.m_StaticBranchCondition = NULL; - if (BuildStaticBranchNameForInstruction(i)) - { - psPhase->m_StaticBranchInstructions.push_back(&i); - i.m_IsStaticBranch = true; - } - } - // Indirect, comparison via another instruction - if (i.asOperands[0].eType == OPERAND_TYPE_TEMP) - { - // Check that the temp only has one visible definition - if (i.asOperands[0].m_Defines.size() == 1) - { - // ...and that it only uses constant buffers and immediates - - Instruction &def = *i.asOperands[0].m_Defines[0].m_Inst; - bool isStatic = true; - for (uint32_t k = def.ui32FirstSrc; k < def.ui32NumOperands; k++) - { - Operand &o = def.asOperands[k]; - if (!(o.eType == OPERAND_TYPE_CONSTANT_BUFFER || o.eType == OPERAND_TYPE_IMMEDIATE32)) - { - isStatic = false; - break; - } - // Also check that the constant buffer access is "simple" - if (o.eType == OPERAND_TYPE_CONSTANT_BUFFER) - { - if (o.m_SubOperands[0].get() || o.m_SubOperands[1].get()) - { - isStatic = false; - break; - } - } - } - if (isStatic) - { - i.m_StaticBranchCondition = &def; - if (BuildStaticBranchNameForInstruction(i)) - { - psPhase->m_StaticBranchInstructions.push_back(&i); - i.m_IsStaticBranch = true; - } - else - i.m_StaticBranchCondition = NULL; - } - } - } - } + for (std::vector::iterator itr = psPhase->psInst.begin(); itr != psPhase->psInst.end(); itr++) + { + Instruction &i = *itr; + + if (!i.IsConditionalBranchInstruction()) + continue; + + // Simple case, direct conditional branch + if (i.asOperands[0].eType == OPERAND_TYPE_CONSTANT_BUFFER) + { + i.m_StaticBranchCondition = NULL; + if (BuildStaticBranchNameForInstruction(i)) + { + psPhase->m_StaticBranchInstructions.push_back(&i); + i.m_IsStaticBranch = true; + } + } + // Indirect, comparison via another instruction + if (i.asOperands[0].eType == OPERAND_TYPE_TEMP) + { + // Check that the temp only has one visible definition + if (i.asOperands[0].m_Defines.size() == 1) + { + // ...and that it only uses constant buffers and immediates + + Instruction &def = *i.asOperands[0].m_Defines[0].m_Inst; + bool isStatic = true; + for (uint32_t k = def.ui32FirstSrc; k < def.ui32NumOperands; k++) + { + Operand &o = def.asOperands[k]; + if (!(o.eType == OPERAND_TYPE_CONSTANT_BUFFER || o.eType == OPERAND_TYPE_IMMEDIATE32)) + { + isStatic = false; + break; + } + // Also check that the constant buffer access is "simple" + if (o.eType == OPERAND_TYPE_CONSTANT_BUFFER) + { + if (o.m_SubOperands[0].get() || o.m_SubOperands[1].get()) + { + isStatic = false; + break; + } + } + } + if (isStatic) + { + i.m_StaticBranchCondition = &def; + if (BuildStaticBranchNameForInstruction(i)) + { + psPhase->m_StaticBranchInstructions.push_back(&i); + i.m_IsStaticBranch = true; + } + else + i.m_StaticBranchCondition = NULL; + } + } + } + } } diff --git a/src/toGLSLDeclaration.cpp b/src/toGLSLDeclaration.cpp index 3130f61..ca521f4 100644 --- a/src/toGLSLDeclaration.cpp +++ b/src/toGLSLDeclaration.cpp @@ -28,149 +28,146 @@ using namespace HLSLcc; void ToGLSL::DeclareConstBufferShaderVariable(const char* varName, const struct ShaderVarType* psType, const struct ConstantBuffer* psCBuf, int unsizedArray, bool addUniformPrefix) { - bstring glsl = *psContext->currentGLSLString; + bstring glsl = *psContext->currentGLSLString; - if (psType->Class == SVC_STRUCT) - { - bformata(glsl, "\t%s%s_Type %s", addUniformPrefix ? "UNITY_UNIFORM " : "", varName, varName); - if (psType->Elements > 1) - { + if (psType->Class == SVC_STRUCT) + { + bformata(glsl, "\t%s%s_Type %s", addUniformPrefix ? "UNITY_UNIFORM " : "", varName, varName); + if (psType->Elements > 1) + { if (HLSLcc::IsUnityFlexibleInstancingBuffer(psCBuf)) { bformata(glsl, "[" UNITY_RUNTIME_INSTANCING_ARRAY_SIZE_MACRO "]"); m_NeedUnityInstancingArraySizeDecl = true; } else - bformata(glsl, "[%d]", psType->Elements); - } - } - else if(psType->Class == SVC_MATRIX_COLUMNS || psType->Class == SVC_MATRIX_ROWS) - { - if (psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) - { - // Translate matrices into vec4 arrays - bformata(glsl, "\t%s%s " HLSLCC_TRANSLATE_MATRIX_FORMAT_STRING "%s", addUniformPrefix ? "UNITY_UNIFORM " : "", HLSLcc::GetConstructorForType(psContext, psType->Type, 4), psType->Rows, psType->Columns, varName); - uint32_t elemCount = (psType->Class == SVC_MATRIX_COLUMNS ? psType->Columns : psType->Rows); - if (psType->Elements > 1) - { - elemCount *= psType->Elements; - } - bformata(glsl, "[%d]", elemCount); - } - else - { - bformata(glsl, "\t%s%s %s", addUniformPrefix ? "UNITY_UNIFORM " : "", HLSLcc::GetMatrixTypeName(psContext, psType->Type, psType->Columns, psType->Rows).c_str(), varName); - if (psType->Elements > 1) - { - bformata(glsl, "[%d]", psType->Elements); - } - } - } - else - if (psType->Class == SVC_VECTOR && psType->Columns > 1) - { - bformata(glsl, "\t%s%s %s", addUniformPrefix ? "UNITY_UNIFORM " : "", HLSLcc::GetConstructorForType(psContext, psType->Type, psType->Columns), varName); - - if(psType->Elements > 1) - { - bformata(glsl, "[%d]", psType->Elements); - } - } - else - if ((psType->Class == SVC_SCALAR) || - (psType->Class == SVC_VECTOR && psType->Columns == 1)) - { - if (psType->Type == SVT_BOOL) - { - //Use int instead of bool. - //Allows implicit conversions to integer and - //bool consumes 4-bytes in HLSL and GLSL anyway. - ((ShaderVarType *)psType)->Type = SVT_INT; - } - - bformata(glsl, "\t%s%s %s", addUniformPrefix ? "UNITY_UNIFORM " : "", HLSLcc::GetConstructorForType(psContext, psType->Type, 1), varName); - - if(psType->Elements > 1) - { - bformata(glsl, "[%d]", psType->Elements); - } - } - if(unsizedArray) - bformata(glsl, "[]"); - bformata(glsl, ";\n"); + bformata(glsl, "[%d]", psType->Elements); + } + } + else if (psType->Class == SVC_MATRIX_COLUMNS || psType->Class == SVC_MATRIX_ROWS) + { + if (psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) + { + // Translate matrices into vec4 arrays + bformata(glsl, "\t%s%s " HLSLCC_TRANSLATE_MATRIX_FORMAT_STRING "%s", addUniformPrefix ? "UNITY_UNIFORM " : "", HLSLcc::GetConstructorForType(psContext, psType->Type, 4), psType->Rows, psType->Columns, varName); + uint32_t elemCount = (psType->Class == SVC_MATRIX_COLUMNS ? psType->Columns : psType->Rows); + if (psType->Elements > 1) + { + elemCount *= psType->Elements; + } + bformata(glsl, "[%d]", elemCount); + } + else + { + bformata(glsl, "\t%s%s %s", addUniformPrefix ? "UNITY_UNIFORM " : "", HLSLcc::GetMatrixTypeName(psContext, psType->Type, psType->Columns, psType->Rows).c_str(), varName); + if (psType->Elements > 1) + { + bformata(glsl, "[%d]", psType->Elements); + } + } + } + else if (psType->Class == SVC_VECTOR && psType->Columns > 1) + { + bformata(glsl, "\t%s%s %s", addUniformPrefix ? "UNITY_UNIFORM " : "", HLSLcc::GetConstructorForType(psContext, psType->Type, psType->Columns), varName); + + if (psType->Elements > 1) + { + bformata(glsl, "[%d]", psType->Elements); + } + } + else if ((psType->Class == SVC_SCALAR) || + (psType->Class == SVC_VECTOR && psType->Columns == 1)) + { + if (psType->Type == SVT_BOOL) + { + //Use int instead of bool. + //Allows implicit conversions to integer and + //bool consumes 4-bytes in HLSL and GLSL anyway. + ((ShaderVarType *)psType)->Type = SVT_INT; + } + + bformata(glsl, "\t%s%s %s", addUniformPrefix ? "UNITY_UNIFORM " : "", HLSLcc::GetConstructorForType(psContext, psType->Type, 1), varName); + + if (psType->Elements > 1) + { + bformata(glsl, "[%d]", psType->Elements); + } + } + if (unsizedArray) + bformata(glsl, "[]"); + bformata(glsl, ";\n"); } //In GLSL embedded structure definitions are not supported. void ToGLSL::PreDeclareStructType(const std::string &name, const struct ShaderVarType* psType) { - bstring glsl = *psContext->currentGLSLString; - uint32_t i; + bstring glsl = *psContext->currentGLSLString; + uint32_t i; - for(i=0; iMemberCount; ++i) - { - if(psType->Members[i].Class == SVC_STRUCT) - { - PreDeclareStructType(psType->Members[i].name, &psType->Members[i]); - } - } + for (i = 0; i < psType->MemberCount; ++i) + { + if (psType->Members[i].Class == SVC_STRUCT) + { + PreDeclareStructType(psType->Members[i].name, &psType->Members[i]); + } + } - if(psType->Class == SVC_STRUCT) - { - //Not supported at the moment - ASSERT(name != "$Element"); + if (psType->Class == SVC_STRUCT) + { + //Not supported at the moment + ASSERT(name != "$Element"); - bformata(glsl, "struct %s_Type {\n", name.c_str()); + bformata(glsl, "struct %s_Type {\n", name.c_str()); - for(i=0; iMemberCount; ++i) - { - ASSERT(psType->Members.size() != 0); + for (i = 0; i < psType->MemberCount; ++i) + { + ASSERT(psType->Members.size() != 0); - DeclareConstBufferShaderVariable(psType->Members[i].name.c_str(), &psType->Members[i], NULL, 0); - } + DeclareConstBufferShaderVariable(psType->Members[i].name.c_str(), &psType->Members[i], NULL, 0); + } - bformata(glsl, "};\n"); - } + bformata(glsl, "};\n"); + } } - static const char* GetInterpolationString(INTERPOLATION_MODE eMode, GLLang lang) { - switch(eMode) - { - case INTERPOLATION_CONSTANT: - { - return "flat "; - } - case INTERPOLATION_LINEAR: - { - return ""; - } - case INTERPOLATION_LINEAR_CENTROID: - { - return "centroid "; - } - case INTERPOLATION_LINEAR_NOPERSPECTIVE: - { - return lang <= LANG_ES_310 ? "" : "noperspective "; - break; - } - case INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID: - { - return lang <= LANG_ES_310 ? "centroid " : "noperspective centroid "; - } - case INTERPOLATION_LINEAR_SAMPLE: - { - return "sample "; - } - case INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE: - { - return lang <= LANG_ES_310 ? "" : "noperspective sample "; - } - default: - { - return ""; - } - } + switch (eMode) + { + case INTERPOLATION_CONSTANT: + { + return "flat "; + } + case INTERPOLATION_LINEAR: + { + return ""; + } + case INTERPOLATION_LINEAR_CENTROID: + { + return "centroid "; + } + case INTERPOLATION_LINEAR_NOPERSPECTIVE: + { + return lang <= LANG_ES_310 ? "" : "noperspective "; + break; + } + case INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID: + { + return lang <= LANG_ES_310 ? "centroid " : "noperspective centroid "; + } + case INTERPOLATION_LINEAR_SAMPLE: + { + return "sample "; + } + case INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE: + { + return lang <= LANG_ES_310 ? "" : "noperspective sample "; + } + default: + { + return ""; + } + } } static void DeclareInput( @@ -178,3337 +175,3385 @@ static void DeclareInput( const Declaration* psDecl, const char* Interpolation, const char* StorageQualifier, const char* Precision, int iNumComponents, OPERAND_INDEX_DIMENSION eIndexDim, const char* InputName, const uint32_t ui32CompMask) { - Shader* psShader = psContext->psShader; - bstring glsl = *psContext->currentGLSLString; - int regSpace = psDecl->asOperands[0].GetRegisterSpace(psContext); - uint32_t ui32Reg = psDecl->asOperands[0].ui32RegisterNumber; - const ShaderInfo::InOutSignature *psSig = NULL; - - // This falls within the specified index ranges. The default is 0 if no input range is specified - - if (regSpace == 0) - psContext->psShader->sInfo.GetInputSignatureFromRegister(ui32Reg, ui32CompMask, &psSig); - else - psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(ui32Reg, ui32CompMask, &psSig); - - ASSERT(psSig != NULL); - - // No need to declare input pos 0 on HS control point phases, it's always position - // Also no point in declaring the builtins - if (psShader->eShaderType == HULL_SHADER && psShader->asPhases[psContext->currentPhase].ePhase == HS_CTRL_POINT_PHASE) - { - if (regSpace == 0) - { - if (psSig->semanticName == "POS" && psSig->ui32SemanticIndex == 0) - return; - } - } - - if((ui32CompMask & ~psShader->acInputDeclared[regSpace][ui32Reg]) != 0) - { - const char* vecType = "vec"; - const char* scalarType = "float"; - - switch(psSig->eComponentType) - { - case INOUT_COMPONENT_UINT32: - { - vecType = "uvec"; - scalarType = "uint"; - break; - } - case INOUT_COMPONENT_SINT32: - { - vecType = "ivec"; - scalarType = "int"; - break; - } - case INOUT_COMPONENT_FLOAT32: - { - break; - } - default: - { - ASSERT(0); - break; - } - } - - if(psContext->psDependencies) - { - if(psShader->eShaderType == PIXEL_SHADER) - { - psContext->psDependencies->SetInterpolationMode(ui32Reg, psDecl->value.eInterpolation); - } - } - - std::string locationQualifier = ""; - - if (HaveInOutLocationQualifier(psContext->psShader->eTargetLanguage) || - ((psContext->flags & HLSLCC_FLAG_NVN_TARGET) && HaveLimitedInOutLocationQualifier(psContext->psShader->eTargetLanguage, psContext->psShader->extensions))) - { - bool addLocation = false; - - // Add locations to vertex shader inputs unless disabled in flags - if (psShader->eShaderType == VERTEX_SHADER && !(psContext->flags & HLSLCC_FLAG_DISABLE_EXPLICIT_LOCATIONS)) - addLocation = true; - - // Add intra-shader locations if requested in flags - if (psShader->eShaderType != VERTEX_SHADER && (psContext->flags & HLSLCC_FLAG_SEPARABLE_SHADER_OBJECTS)) - addLocation = true; - - if (addLocation) - { - std::ostringstream oss; - oss << "layout(location = " << psContext->psDependencies->GetVaryingLocation(std::string(InputName), psShader->eShaderType, true) << ") "; - locationQualifier = oss.str(); - } - } - - psShader->acInputDeclared[regSpace][ui32Reg] = (char)psSig->ui32Mask; - - // Do the reflection report on vertex shader inputs - if (psShader->eShaderType == VERTEX_SHADER) - { - psContext->m_Reflection.OnInputBinding(std::string(InputName), psContext->psDependencies->GetVaryingLocation(std::string(InputName), VERTEX_SHADER, true)); - } - - switch (eIndexDim) - { - case INDEX_2D: - { - if(iNumComponents == 1) - { - const uint32_t regNum = psDecl->asOperands[0].ui32RegisterNumber; - const uint32_t arraySize = psDecl->asOperands[0].aui32ArraySizes[0]; - - psContext->psShader->abScalarInput[regSpace][regNum] |= (int)ui32CompMask; - - if(psShader->eShaderType == HULL_SHADER || psDecl->asOperands[0].eType == OPERAND_TYPE_INPUT_CONTROL_POINT) - bformata(glsl, "%s%s%s %s %s %s [];\n", locationQualifier.c_str(), Interpolation, StorageQualifier, Precision, scalarType, InputName); - else - bformata(glsl, "%s%s%s %s %s %s [%d];\n", locationQualifier.c_str(), Interpolation, StorageQualifier, Precision, scalarType, InputName, arraySize); - } - else - { - if (psShader->eShaderType == HULL_SHADER || psDecl->asOperands[0].eType == OPERAND_TYPE_INPUT_CONTROL_POINT) - bformata(glsl, "%s%s%s %s %s%d %s [];\n", locationQualifier.c_str(), Interpolation, StorageQualifier, Precision, vecType, iNumComponents, InputName); - else - bformata(glsl, "%s%s%s %s %s%d %s [%d];\n", locationQualifier.c_str(), Interpolation, StorageQualifier, Precision, vecType, iNumComponents, InputName, - psDecl->asOperands[0].aui32ArraySizes[0]); - } - break; - } - default: - { - if(iNumComponents == 1) - { - psContext->psShader->abScalarInput[regSpace][ui32Reg] |= (int)ui32CompMask; - - bformata(glsl, "%s%s%s %s %s %s;\n", locationQualifier.c_str(), Interpolation, StorageQualifier, Precision, scalarType, InputName); - } - else - { - if(psShader->aIndexedInput[regSpace][ui32Reg] > 0) - { - bformata(glsl, "%s%s%s %s %s%d %s", locationQualifier.c_str(), Interpolation, StorageQualifier, Precision, vecType, iNumComponents, InputName); - if (psShader->eShaderType == HULL_SHADER) - bcatcstr(glsl, "[];\n"); - else - bcatcstr(glsl, ";\n"); - } - else - { - if (psShader->eShaderType == HULL_SHADER) - bformata(glsl, "%s%s%s %s %s%d %s[];\n", locationQualifier.c_str(), Interpolation, StorageQualifier, Precision, vecType, iNumComponents, InputName); - else - bformata(glsl, "%s%s%s %s %s%d %s;\n", locationQualifier.c_str(), Interpolation, StorageQualifier, Precision, vecType, iNumComponents, InputName); - } - } - break; - } - } - } -} + Shader* psShader = psContext->psShader; + bstring glsl = *psContext->currentGLSLString; + int regSpace = psDecl->asOperands[0].GetRegisterSpace(psContext); + uint32_t ui32Reg = psDecl->asOperands[0].ui32RegisterNumber; + const ShaderInfo::InOutSignature *psSig = NULL; + + // This falls within the specified index ranges. The default is 0 if no input range is specified + + if (regSpace == 0) + psContext->psShader->sInfo.GetInputSignatureFromRegister(ui32Reg, ui32CompMask, &psSig); + else + psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(ui32Reg, ui32CompMask, &psSig); + + ASSERT(psSig != NULL); + + // No need to declare input pos 0 on HS control point phases, it's always position + // Also no point in declaring the builtins + if (psShader->eShaderType == HULL_SHADER && psShader->asPhases[psContext->currentPhase].ePhase == HS_CTRL_POINT_PHASE) + { + if (regSpace == 0) + { + if ((psSig->semanticName == "POS" || psSig->semanticName == "SV_Position") && psSig->ui32SemanticIndex == 0) + return; + } + } -bool ToGLSL::RenderTargetDeclared(uint32_t input) -{ - if (m_DeclaredRenderTarget.find(input) != m_DeclaredRenderTarget.end()) - return true; + if ((ui32CompMask & ~psShader->acInputDeclared[regSpace][ui32Reg]) != 0) + { + const char* vecType = "vec"; + const char* scalarType = "float"; - m_DeclaredRenderTarget.insert(input); - return false; -} + switch (psSig->eComponentType) + { + case INOUT_COMPONENT_UINT32: + { + vecType = "uvec"; + scalarType = "uint"; + break; + } + case INOUT_COMPONENT_SINT32: + { + vecType = "ivec"; + scalarType = "int"; + break; + } + case INOUT_COMPONENT_FLOAT32: + { + break; + } + default: + { + ASSERT(0); + break; + } + } -void ToGLSL::AddBuiltinInput(const Declaration* psDecl, const char* builtinName) -{ - Shader* psShader = psContext->psShader; const Operand* op = &psDecl->asOperands[0]; + if (psContext->psDependencies) + { + if (psShader->eShaderType == PIXEL_SHADER) + { + psContext->psDependencies->SetInterpolationMode(ui32Reg, psDecl->value.eInterpolation); + } + } - const int regSpace = op->GetRegisterSpace(psContext); ASSERT(regSpace == 0); - const uint32_t ui32Reg = op->ui32RegisterNumber, ui32CompMask = op->ui32CompMask; + std::string locationQualifier = ""; - // we need to at least mark if they are scalars or not (as we might need to use vector ctor) - if(op->GetNumInputElements(psContext) == 1) - psShader->abScalarInput[regSpace][ui32Reg] |= (int)ui32CompMask; -} + if (HaveInOutLocationQualifier(psContext->psShader->eTargetLanguage) || + ((psContext->flags & HLSLCC_FLAG_NVN_TARGET) && HaveLimitedInOutLocationQualifier(psContext->psShader->eTargetLanguage, psContext->psShader->extensions))) + { + bool addLocation = false; + // Add locations to vertex shader inputs unless disabled in flags + if (psShader->eShaderType == VERTEX_SHADER && !(psContext->flags & HLSLCC_FLAG_DISABLE_EXPLICIT_LOCATIONS)) + addLocation = true; -void ToGLSL::AddBuiltinOutput(const Declaration* psDecl, int arrayElements, const char* builtinName) -{ - bstring glsl = *psContext->currentGLSLString; - Shader* psShader = psContext->psShader; - const SPECIAL_NAME eSpecialName = psDecl->asOperands[0].eSpecialName; - - if (eSpecialName != NAME_CLIP_DISTANCE && eSpecialName != NAME_CULL_DISTANCE) - return; - - psContext->psShader->asPhases[psContext->currentPhase].hasPostShaderCode = 1; - - if(psContext->OutputNeedsDeclaring(&psDecl->asOperands[0], arrayElements ? arrayElements : 1)) - { - const ShaderInfo::InOutSignature* psSignature = NULL; - - psShader->sInfo.GetOutputSignatureFromRegister( - psDecl->asOperands[0].ui32RegisterNumber, - psDecl->asOperands[0].ui32CompMask, - 0, - &psSignature); - psContext->currentGLSLString = &psContext->psShader->asPhases[psContext->currentPhase].postShaderCode; - glsl = *psContext->currentGLSLString; - psContext->indent++; - if(arrayElements) - { - - } - else if((eSpecialName == NAME_CLIP_DISTANCE || eSpecialName == NAME_CULL_DISTANCE) && psContext->psShader->eShaderType != HULL_SHADER) - { - // Case 828454 : For some reason DX compiler seems to inject clip/cull distance declaration to the hull shader sometimes - // even though it's not used at all, and overlaps some completely unrelated patch constant declarations. We'll just ignore this now. - // Revisit this if this actually pops up elsewhere. - - // cull/clip distance are pretty similar (the only real difference is extension name (and functionality, but we dont care here)) - int max = psDecl->asOperands[0].GetMaxComponent(); - - if (IsESLanguage(psShader->eTargetLanguage)) - psContext->RequireExtension("GL_EXT_clip_cull_distance"); - else if(eSpecialName == NAME_CULL_DISTANCE) - psContext->RequireExtension("GL_ARB_cull_distance"); // TODO: it is builtin in GLSL 4.5 (should we care?) - const char* glName = eSpecialName == NAME_CLIP_DISTANCE ? "Clip" : "Cull"; - - int applySwizzle = psDecl->asOperands[0].GetNumSwizzleElements() > 1 ? 1 : 0; - const char* swizzle[] = {".x", ".y", ".z", ".w"}; - - ASSERT(psSignature!=NULL); - const int index = psSignature->ui32SemanticIndex; - - //Clip/Cull distance can be spread across 1 or 2 outputs (each no more than a vec4). - //Some examples: - //float4 clip[2] : SV_ClipDistance; //8 clip distances - //float3 clip[2] : SV_ClipDistance; //6 clip distances - //float4 clip : SV_ClipDistance; //4 clip distances - //float clip : SV_ClipDistance; //1 clip distance. - - //In GLSL the clip/cull distance built-in is an array of up to 8 floats. - //So vector to array conversion needs to be done here. - int multiplier = 1; - if(index == 1) - { - const ShaderInfo::InOutSignature* psFirstClipSignature; - if (psShader->sInfo.GetOutputSignatureFromSystemValue(eSpecialName, 1, &psFirstClipSignature)) - { - if(psFirstClipSignature->ui32Mask & (1 << 3)) multiplier = 4; - else if(psFirstClipSignature->ui32Mask & (1 << 2)) multiplier = 3; - else if(psFirstClipSignature->ui32Mask & (1 << 1)) multiplier = 2; - } - } - - // Add a specially crafted comment so runtime knows to enable clip planes. - // We may end up doing 2 of these, so at runtime OR the results - uint32_t clipmask = psDecl->asOperands[0].GetAccessMask(); - if(index != 0) - clipmask <<= multiplier; - bformata(psContext->glsl, "// HLSLcc_%sDistances_%x\n", glName, clipmask); - - psContext->psShader->asPhases[psContext->currentPhase].acOutputNeedsRedirect[psSignature->ui32Register] = 0xff; - bformata(psContext->glsl, "vec4 phase%d_gl%sDistance%d;\n", psContext->currentPhase, glName, index); - - for(int i=0; iAddIndentation(); - bformata(glsl, "%s[%d] = (", builtinName, i + multiplier*index); - TranslateOperand(&psDecl->asOperands[0], TO_FLAG_NONE); - if(applySwizzle) bformata(glsl, ")%s;\n", swizzle[i]); - else bformata(glsl, ");\n"); - } - } - psContext->indent--; - psContext->currentGLSLString = &psContext->glsl; - } -} + // Add intra-shader locations if supported + if (psShader->eShaderType != VERTEX_SHADER) + addLocation = true; -void ToGLSL::HandleOutputRedirect(const Declaration *psDecl, const char *Precision) -{ - const Operand *psOperand = &psDecl->asOperands[0]; - Shader *psShader = psContext->psShader; - bstring glsl = *psContext->currentGLSLString; - int needsRedirect = 0; - const ShaderInfo::InOutSignature *psSig = NULL; - - int regSpace = psOperand->GetRegisterSpace(psContext); - if (regSpace == 0 && psShader->asPhases[psContext->currentPhase].acOutputNeedsRedirect[psOperand->ui32RegisterNumber] == 0xff) - { - needsRedirect = 1; - } - else if (regSpace == 1 && psShader->asPhases[psContext->currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] == 0xff) - { - needsRedirect = 1; - } - - if (needsRedirect == 1) - { - // TODO What if this is indexed? - ShaderPhase *psPhase = &psShader->asPhases[psContext->currentPhase]; - int comp = 0; - uint32_t origMask = psOperand->ui32CompMask; - - ASSERT(psContext->psShader->aIndexedOutput[regSpace][psOperand->ui32RegisterNumber] == 0); - - psContext->AddIndentation(); - bformata(glsl, "%s vec4 phase%d_Output%d_%d;\n", Precision, psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber); - - psPhase->hasPostShaderCode = 1; - psContext->currentGLSLString = &psPhase->postShaderCode; - - while (comp < 4) - { - int numComps = 0; - int hasCast = 0; - uint32_t mask, i; - psSig = NULL; - if (regSpace == 0) - psContext->psShader->sInfo.GetOutputSignatureFromRegister(psOperand->ui32RegisterNumber, 1 << comp, psContext->psShader->ui32CurrentVertexOutputStream, &psSig, true); - else - psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(psOperand->ui32RegisterNumber, 1 << comp, &psSig, true); - - // The register isn't necessarily packed full. Continue with the next component. - if (psSig == NULL) - { - comp++; - continue; - } - - numComps = GetNumberBitsSet(psSig->ui32Mask); - mask = psSig->ui32Mask; - - ((Operand *)psOperand)->ui32CompMask = 1 << comp; - psContext->AddIndentation(); - TranslateOperand(psOperand, TO_FLAG_NAME_ONLY); - - bcatcstr(psPhase->postShaderCode, " = "); - - if (psSig->eComponentType == INOUT_COMPONENT_SINT32) - { - bformata(psPhase->postShaderCode, HaveBitEncodingOps(psContext->psShader->eTargetLanguage) ? "floatBitsToInt(" : "int("); - hasCast = 1; - } - else if (psSig->eComponentType == INOUT_COMPONENT_UINT32) - { - bformata(psPhase->postShaderCode, HaveBitEncodingOps(psContext->psShader->eTargetLanguage) ? "floatBitsToUint(" : "int("); - hasCast = 1; - } - bformata(psPhase->postShaderCode, "phase%d_Output%d_%d.", psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber); - // Print out mask - for (i = 0; i < 4; i++) - { - if ((mask & (1 << i)) == 0) - continue; - - bformata(psPhase->postShaderCode, "%c", "xyzw"[i]); - } - - if (hasCast) - bcatcstr(psPhase->postShaderCode, ")"); - comp += numComps; - bcatcstr(psPhase->postShaderCode, ";\n"); - } - - psContext->currentGLSLString = &psContext->glsl; - - ((Operand *)psOperand)->ui32CompMask = origMask; - if (regSpace == 0) - psShader->asPhases[psContext->currentPhase].acOutputNeedsRedirect[psOperand->ui32RegisterNumber] = 0xfe; - else - psShader->asPhases[psContext->currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] = 0xfe; - } -} + if (addLocation) + { + std::ostringstream oss; + oss << "layout(location = " << psContext->psDependencies->GetVaryingLocation(std::string(InputName), psShader->eShaderType, true) << ") "; + locationQualifier = oss.str(); + } + } -void ToGLSL::AddUserOutput(const Declaration* psDecl) -{ - bstring glsl = *psContext->currentGLSLString; - Shader* psShader = psContext->psShader; - - if(psContext->OutputNeedsDeclaring(&psDecl->asOperands[0], 1)) - { - const Operand* psOperand = &psDecl->asOperands[0]; - const char* Precision = ""; - int iNumComponents; - bstring type = NULL; - int regSpace = psDecl->asOperands[0].GetRegisterSpace(psContext); - uint32_t ui32Reg = psDecl->asOperands[0].ui32RegisterNumber; - - const ShaderInfo::InOutSignature* psSignature = NULL; - - if (regSpace == 0) - psShader->sInfo.GetOutputSignatureFromRegister( - ui32Reg, - psDecl->asOperands[0].ui32CompMask, - psShader->ui32CurrentVertexOutputStream, - &psSignature); - else - psShader->sInfo.GetPatchConstantSignatureFromRegister(ui32Reg, psDecl->asOperands[0].ui32CompMask, &psSignature); - - if (psSignature->semanticName == "POS" && psOperand->ui32RegisterNumber == 0 && psContext->psShader->eShaderType == VERTEX_SHADER) - return; - - iNumComponents = GetNumberBitsSet(psSignature->ui32Mask); - if (iNumComponents == 1) - psContext->psShader->abScalarOutput[regSpace][ui32Reg] |= (int)psDecl->asOperands[0].ui32CompMask; - - switch (psSignature->eComponentType) - { - case INOUT_COMPONENT_UINT32: - { - if (iNumComponents > 1) - type = bformat("uvec%d", iNumComponents); - else - type = bformat("uint"); - break; - } - case INOUT_COMPONENT_SINT32: - { - if (iNumComponents > 1) - type = bformat("ivec%d", iNumComponents); - else - type = bformat("int"); - break; - } - case INOUT_COMPONENT_FLOAT32: - { - if (iNumComponents > 1) - type = bformat("vec%d", iNumComponents); - else - type = bformat("float"); - break; - } - default: - ASSERT(0); + psShader->acInputDeclared[regSpace][ui32Reg] = (char)psSig->ui32Mask; + + // Do the reflection report on vertex shader inputs + if (psShader->eShaderType == VERTEX_SHADER) + { + psContext->m_Reflection.OnInputBinding(std::string(InputName), psContext->psDependencies->GetVaryingLocation(std::string(InputName), VERTEX_SHADER, true)); + } + + switch (eIndexDim) + { + case INDEX_2D: + { + if (iNumComponents == 1) + { + const uint32_t regNum = psDecl->asOperands[0].ui32RegisterNumber; + const uint32_t arraySize = psDecl->asOperands[0].aui32ArraySizes[0]; + + psContext->psShader->abScalarInput[regSpace][regNum] |= (int)ui32CompMask; + + if (psShader->eShaderType == HULL_SHADER || psDecl->asOperands[0].eType == OPERAND_TYPE_INPUT_CONTROL_POINT) + bformata(glsl, "%s%s%s %s %s %s [];\n", locationQualifier.c_str(), Interpolation, StorageQualifier, Precision, scalarType, InputName); + else + bformata(glsl, "%s%s%s %s %s %s [%d];\n", locationQualifier.c_str(), Interpolation, StorageQualifier, Precision, scalarType, InputName, arraySize); + } + else + { + if (psShader->eShaderType == HULL_SHADER || psDecl->asOperands[0].eType == OPERAND_TYPE_INPUT_CONTROL_POINT) + bformata(glsl, "%s%s%s %s %s%d %s [];\n", locationQualifier.c_str(), Interpolation, StorageQualifier, Precision, vecType, iNumComponents, InputName); + else + bformata(glsl, "%s%s%s %s %s%d %s [%d];\n", locationQualifier.c_str(), Interpolation, StorageQualifier, Precision, vecType, iNumComponents, InputName, + psDecl->asOperands[0].aui32ArraySizes[0]); + } break; - } - - if(HavePrecisionQualifiers(psContext)) - { - switch(psOperand->eMinPrecision) - { - case OPERAND_MIN_PRECISION_DEFAULT: - { - Precision = "highp "; - break; - } - case OPERAND_MIN_PRECISION_FLOAT_16: - { - Precision = "mediump "; - break; - } - case OPERAND_MIN_PRECISION_FLOAT_2_8: - { - Precision = EmitLowp(psContext) ? "lowp " : "mediump "; - break; - } - case OPERAND_MIN_PRECISION_SINT_16: - { - Precision = "mediump "; - //type = "ivec"; - break; - } - case OPERAND_MIN_PRECISION_UINT_16: - { - Precision = "mediump "; - //type = "uvec"; - break; - } - } - } - - switch(psShader->eShaderType) - { - case PIXEL_SHADER: - { - switch(psDecl->asOperands[0].eType) - { - case OPERAND_TYPE_OUTPUT_COVERAGE_MASK: - { - break; - } - case OPERAND_TYPE_OUTPUT_DEPTH: - { - if (psShader->eTargetLanguage == LANG_ES_100 && !psContext->EnableExtension("GL_EXT_frag_depth")) - { - bcatcstr(psContext->extensions, "#ifdef GL_EXT_frag_depth\n"); - bcatcstr(psContext->extensions, "#define gl_FragDepth gl_FragDepthEXT\n"); - bcatcstr(psContext->extensions, "#endif\n"); - } - break; - } - case OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL: - { - psContext->EnableExtension("GL_ARB_conservative_depth"); - bcatcstr(glsl, "#ifdef GL_ARB_conservative_depth\n"); - bcatcstr(glsl, "layout (depth_greater) out float gl_FragDepth;\n"); - bcatcstr(glsl, "#endif\n"); - break; - } - case OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL: - { - psContext->EnableExtension("GL_ARB_conservative_depth"); - bcatcstr(glsl, "#ifdef GL_ARB_conservative_depth\n"); - bcatcstr(glsl, "layout (depth_less) out float gl_FragDepth;\n"); - bcatcstr(glsl, "#endif\n"); - break; - } - default: - { - uint32_t renderTarget = psDecl->asOperands[0].ui32RegisterNumber; - - char OutputName[512]; - bstring oname; - oname = bformat("%s%s%d", psContext->outputPrefix, psSignature->semanticName.c_str(), renderTarget); - strncpy(OutputName, (char *)oname->data, 512); - bdestroy(oname); - - if (psShader->eTargetLanguage == LANG_ES_100 && renderTarget > 0) - psContext->EnableExtension("GL_EXT_draw_buffers"); - - bool haveFramebufferFetch = (psShader->extensions->EXT_shader_framebuffer_fetch && - psShader->eShaderType == PIXEL_SHADER && - psContext->flags & HLSLCC_FLAG_SHADER_FRAMEBUFFER_FETCH); - - if(WriteToFragData(psContext->psShader->eTargetLanguage)) - { - bformata(glsl, "#define %s gl_FragData[%d]\n", OutputName, renderTarget); - } - else - { - if (!RenderTargetDeclared(renderTarget)) - { - bstring layoutQualifier = bformat(""); - - if (HaveInOutLocationQualifier(psContext->psShader->eTargetLanguage) || - HaveLimitedInOutLocationQualifier(psContext->psShader->eTargetLanguage, psContext->psShader->extensions)) - { - uint32_t index = 0; - - if((psContext->flags & HLSLCC_FLAG_DUAL_SOURCE_BLENDING) && DualSourceBlendSupported(psContext->psShader->eTargetLanguage)) - { - if(renderTarget > 0) - { - renderTarget = 0; - index = 1; - } - layoutQualifier = bformat("layout(location = %d, index = %d) ", renderTarget, index); - } - else - { - layoutQualifier = bformat("layout(location = %d) ", renderTarget); - } - } - - if (haveFramebufferFetch) - { - bcatcstr(glsl, "#ifdef GL_EXT_shader_framebuffer_fetch\n"); - bformata(glsl, "%sinout %s%s %s;\n", bstr2cstr(layoutQualifier, '\0'), Precision, type->data, OutputName); - bcatcstr(glsl, "#else\n"); - bformata(glsl, "%sout %s%s %s;\n", bstr2cstr(layoutQualifier, '\0'), Precision, type->data, OutputName); - bcatcstr(glsl, "#endif\n"); - } - else - bformata(glsl, "%sout %s%s %s;\n", bstr2cstr(layoutQualifier, '\0'), Precision, type->data, OutputName); - } - } - break; - } - } - break; - } - case VERTEX_SHADER: - case GEOMETRY_SHADER: - case DOMAIN_SHADER: - case HULL_SHADER: - { - const char* Interpolation = ""; - char OutputName[512]; - bstring oname; - oname = bformat("%s%s%s%d", psContext->outputPrefix, regSpace == 0 ? "" : "patch", psSignature->semanticName.c_str(), psSignature->ui32SemanticIndex); - strncpy(OutputName, (char *)oname->data, 512); - bdestroy(oname); - - if (psShader->eShaderType == VERTEX_SHADER) - { - if (psSignature->eComponentType == INOUT_COMPONENT_UINT32 || - psSignature->eComponentType == INOUT_COMPONENT_SINT32) // GLSL spec requires that integer vertex outputs always have "flat" interpolation - { - Interpolation = GetInterpolationString(INTERPOLATION_CONSTANT, psContext->psShader->eTargetLanguage); - } - else if (psContext->psDependencies) // For floats we get the interpolation that was resolved from the fragment shader input - { - Interpolation = GetInterpolationString(psContext->psDependencies->GetInterpolationMode(psDecl->asOperands[0].ui32RegisterNumber), psContext->psShader->eTargetLanguage); - } - } - - if (HaveInOutLocationQualifier(psContext->psShader->eTargetLanguage) && (psContext->flags & HLSLCC_FLAG_SEPARABLE_SHADER_OBJECTS)) - { - bformata(glsl, "layout(location = %d) ", psContext->psDependencies->GetVaryingLocation(std::string(OutputName), psShader->eShaderType, false)); - } - - if(InOutSupported(psContext->psShader->eTargetLanguage)) - { - if (psContext->psShader->eShaderType == HULL_SHADER) - { - // In Hull shaders outputs are either per-vertex (and need []) or per-patch (need 'out patch') - if (regSpace == 0) - bformata(glsl, "%sout %s%s %s[];\n", Interpolation, Precision, type->data, OutputName); - else - bformata(glsl, "patch %sout %s%s %s;\n", Interpolation, Precision, type->data, OutputName); - } - else - bformata(glsl, "%sout %s%s %s;\n", Interpolation, Precision, type->data, OutputName); - } - else - { - bformata(glsl, "%svarying %s%s %s;\n", Interpolation, Precision, type->data, OutputName); - } - - break; - } + } default: - ASSERT(0); + { + if (iNumComponents == 1) + { + psContext->psShader->abScalarInput[regSpace][ui32Reg] |= (int)ui32CompMask; + + bformata(glsl, "%s%s%s %s %s %s;\n", locationQualifier.c_str(), Interpolation, StorageQualifier, Precision, scalarType, InputName); + } + else + { + if (psShader->aIndexedInput[regSpace][ui32Reg] > 0) + { + bformata(glsl, "%s%s%s %s %s%d %s", locationQualifier.c_str(), Interpolation, StorageQualifier, Precision, vecType, iNumComponents, InputName); + if (psShader->eShaderType == HULL_SHADER) + bcatcstr(glsl, "[];\n"); + else + bcatcstr(glsl, ";\n"); + } + else + { + if (psShader->eShaderType == HULL_SHADER) + bformata(glsl, "%s%s%s %s %s%d %s[];\n", locationQualifier.c_str(), Interpolation, StorageQualifier, Precision, vecType, iNumComponents, InputName); + else + bformata(glsl, "%s%s%s %s %s%d %s;\n", locationQualifier.c_str(), Interpolation, StorageQualifier, Precision, vecType, iNumComponents, InputName); + } + } break; - - } - HandleOutputRedirect(psDecl, Precision); - bdestroy(type); - } - + } + } + } } -void ToGLSL::DeclareUBOConstants(const uint32_t ui32BindingPoint, const ConstantBuffer* psCBuf, bstring glsl) +bool ToGLSL::RenderTargetDeclared(uint32_t input) { - uint32_t i; - - bool skipUnused = false; - - if((psContext->flags & HLSLCC_FLAG_REMOVE_UNUSED_GLOBALS) && psCBuf->name == "$Globals") - skipUnused = true; - - - std::string cbName = psCBuf->name; - if(cbName == "$Globals") - { - // Need to tweak Globals struct name to prevent clashes between shader stages - char prefix = 'A'; - switch (psContext->psShader->eShaderType) - { - default: - ASSERT(0); - break; - case COMPUTE_SHADER: - prefix = 'C'; - break; - case VERTEX_SHADER: - prefix = 'V'; - break; - case PIXEL_SHADER: - prefix = 'P'; - break; - case GEOMETRY_SHADER: - prefix = 'G'; - break; - case HULL_SHADER: - prefix = 'H'; - break; - case DOMAIN_SHADER: - prefix = 'D'; - break; - } + if (m_DeclaredRenderTarget.find(input) != m_DeclaredRenderTarget.end()) + return true; - cbName[0] = prefix; - } - - for(i=0; i < psCBuf->asVars.size(); ++i) - { - if(skipUnused && !psCBuf->asVars[i].sType.m_IsUsed) - continue; - - PreDeclareStructType(psCBuf->asVars[i].name, &psCBuf->asVars[i].sType); - } - - if (psContext->flags & HLSLCC_FLAG_WRAP_UBO) - bformata(glsl, "#ifndef HLSLCC_DISABLE_UNIFORM_BUFFERS\n#define UNITY_UNIFORM\n"); - - /* [layout (location = X)] uniform vec4 HLSLConstantBufferName[numConsts]; */ - if ((psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0) - { - GLSLCrossDependencyData::VulkanResourceBinding binding = psContext->psDependencies->GetVulkanResourceBinding(cbName, false, 1); - bformata(glsl, "layout(set = %d, binding = %d, std140) ", binding.first, binding.second); - } - else - { - if (HaveUniformBindingsAndLocations(psContext->psShader->eTargetLanguage, psContext->psShader->extensions, psContext->flags)) - bformata(glsl, "layout(binding = %d, std140) ", ui32BindingPoint); - else - bcatcstr(glsl, "layout(std140) "); - } - - bformata(glsl, "uniform %s {\n", cbName.c_str()); - - if (psContext->flags & HLSLCC_FLAG_WRAP_UBO) - bformata(glsl, "#else\n#define UNITY_UNIFORM uniform\n#endif\n"); - - for(i=0; i < psCBuf->asVars.size(); ++i) - { - if(skipUnused && !psCBuf->asVars[i].sType.m_IsUsed) - continue; - - DeclareConstBufferShaderVariable(psCBuf->asVars[i].name.c_str(), - &psCBuf->asVars[i].sType, psCBuf, 0, psContext->flags & HLSLCC_FLAG_WRAP_UBO ? true : false); - } - - if (psContext->flags & HLSLCC_FLAG_WRAP_UBO) - bformata(glsl, "#ifndef HLSLCC_DISABLE_UNIFORM_BUFFERS\n"); - - - if (psContext->flags & HLSLCC_FLAG_UNIFORM_BUFFER_OBJECT_WITH_INSTANCE_NAME) - { - std::string instanceName = UniformBufferInstanceName(psContext, psCBuf->name); - bformata(glsl, "} %s;\n", instanceName.c_str()); - } - else - bcatcstr(glsl, "};\n"); - - if (psContext->flags & HLSLCC_FLAG_WRAP_UBO) - bformata(glsl, "#endif\n#undef UNITY_UNIFORM\n"); + m_DeclaredRenderTarget.insert(input); + return false; } -static void DeclareBufferVariable(HLSLCrossCompilerContext* psContext, uint32_t ui32BindingPoint, - const Operand* psOperand, const uint32_t ui32GloballyCoherentAccess, - const uint32_t isRaw, const uint32_t isUAV, const uint32_t hasEmbeddedCounter, const uint32_t stride, bstring glsl) +void ToGLSL::AddBuiltinInput(const Declaration* psDecl, const char* builtinName) { - const bool isVulkan = (psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0; - bstring BufNamebstr = bfromcstr(""); - // Use original HLSL bindings for UAVs only. For non-UAV buffers we have resolved new binding points from the same register space. - if (!isUAV && !isVulkan) - ui32BindingPoint = psContext->psShader->aui32StructuredBufferBindingPoints[psContext->psShader->ui32CurrentStructuredBufferIndex++]; - - ResourceName(BufNamebstr, psContext, isUAV ? RGROUP_UAV : RGROUP_TEXTURE, psOperand->ui32RegisterNumber, 0); - - char *btmp = bstr2cstr(BufNamebstr, '\0'); - std::string BufName = btmp; - bcstrfree(btmp); - bdestroy(BufNamebstr); - - // Declare the struct type for structured buffers - if (!isRaw) - bformata(glsl, " struct %s_type {\n\tuint[%d] value;\n};\n\n", BufName.c_str(), stride / 4); - - if (isVulkan) - { - GLSLCrossDependencyData::VulkanResourceBinding binding = psContext->psDependencies->GetVulkanResourceBinding(BufName); - bformata(glsl, "layout(set = %d, binding = %d, std430) ", binding.first, binding.second); - } - else - { - bformata(glsl, "layout(std430, binding = %d) ", ui32BindingPoint); - } - - if (ui32GloballyCoherentAccess & GLOBALLY_COHERENT_ACCESS) - bcatcstr(glsl, "coherent "); - - if (!isUAV) - bcatcstr(glsl, "readonly "); - - bformata(glsl, "buffer %s {\n\t", BufName.c_str()); - - if (hasEmbeddedCounter) - bformata(glsl, "coherent uint %s_counter;\n\t", BufName.c_str()); - - if (isRaw) - { - if (HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) - bcatcstr(glsl, "uint"); - else - bcatcstr(glsl, "int"); - } - else - bformata(glsl, "%s_type", BufName.c_str()); - - bformata(glsl, " %s_buf[];\n};\n", BufName.c_str()); - + Shader* psShader = psContext->psShader; + const Operand* psOperand = &psDecl->asOperands[0]; + const int regSpace = psOperand->GetRegisterSpace(psContext); + ASSERT(regSpace == 0); + + // we need to at least mark if they are scalars or not (as we might need to use vector ctor) + if (psOperand->GetNumInputElements(psContext) == 1) + psShader->abScalarInput[regSpace][psOperand->ui32RegisterNumber] |= (int)psOperand->ui32CompMask; } -void ToGLSL::DeclareStructConstants(const uint32_t ui32BindingPoint, - const ConstantBuffer* psCBuf, const Operand* psOperand, - bstring glsl) +void ToGLSL::AddBuiltinOutput(const Declaration* psDecl, int arrayElements, const char* builtinName) { - uint32_t i; - int useGlobalsStruct = 1; - bool skipUnused = false; - - if((psContext->flags & HLSLCC_FLAG_DISABLE_GLOBALS_STRUCT) && psCBuf->name[0] == '$') - useGlobalsStruct = 0; - - if((psContext->flags & HLSLCC_FLAG_REMOVE_UNUSED_GLOBALS) && psCBuf->name == "$Globals") - skipUnused = true; - - if ((psContext->flags & HLSLCC_FLAG_UNIFORM_BUFFER_OBJECT) == 0) - useGlobalsStruct = 0; - + bstring glsl = *psContext->currentGLSLString; + Shader* psShader = psContext->psShader; + const SPECIAL_NAME eSpecialName = psDecl->asOperands[0].eSpecialName; + + if (eSpecialName != NAME_CLIP_DISTANCE && eSpecialName != NAME_CULL_DISTANCE) + return; + + psContext->psShader->asPhases[psContext->currentPhase].hasPostShaderCode = 1; + + if (psContext->OutputNeedsDeclaring(&psDecl->asOperands[0], arrayElements ? arrayElements : 1)) + { + const ShaderInfo::InOutSignature* psSignature = NULL; + + psShader->sInfo.GetOutputSignatureFromRegister( + psDecl->asOperands[0].ui32RegisterNumber, + psDecl->asOperands[0].ui32CompMask, + 0, + &psSignature); + psContext->currentGLSLString = &psContext->psShader->asPhases[psContext->currentPhase].postShaderCode; + glsl = *psContext->currentGLSLString; + psContext->indent++; + if (arrayElements) + { + } + else if ((eSpecialName == NAME_CLIP_DISTANCE || eSpecialName == NAME_CULL_DISTANCE) && psContext->psShader->eShaderType != HULL_SHADER) + { + // Case 828454 : For some reason DX compiler seems to inject clip/cull distance declaration to the hull shader sometimes + // even though it's not used at all, and overlaps some completely unrelated patch constant declarations. We'll just ignore this now. + // Revisit this if this actually pops up elsewhere. + + // cull/clip distance are pretty similar (the only real difference is extension name (and functionality, but we dont care here)) + int max = psDecl->asOperands[0].GetMaxComponent(); + + if (IsESLanguage(psShader->eTargetLanguage)) + psContext->RequireExtension("GL_EXT_clip_cull_distance"); + else if (eSpecialName == NAME_CULL_DISTANCE) + psContext->RequireExtension("GL_ARB_cull_distance"); // TODO: it is builtin in GLSL 4.5 (should we care?) + const char* glName = eSpecialName == NAME_CLIP_DISTANCE ? "Clip" : "Cull"; + + int applySwizzle = psDecl->asOperands[0].GetNumSwizzleElements() > 1 ? 1 : 0; + const char* swizzle[] = {".x", ".y", ".z", ".w"}; + + ASSERT(psSignature != NULL); + const int index = psSignature->ui32SemanticIndex; + + //Clip/Cull distance can be spread across 1 or 2 outputs (each no more than a vec4). + //Some examples: + //float4 clip[2] : SV_ClipDistance; //8 clip distances + //float3 clip[2] : SV_ClipDistance; //6 clip distances + //float4 clip : SV_ClipDistance; //4 clip distances + //float clip : SV_ClipDistance; //1 clip distance. + + //In GLSL the clip/cull distance built-in is an array of up to 8 floats. + //So vector to array conversion needs to be done here. + int multiplier = 1; + if (index == 1) + { + const ShaderInfo::InOutSignature* psFirstClipSignature; + if (psShader->sInfo.GetOutputSignatureFromSystemValue(eSpecialName, 1, &psFirstClipSignature)) + { + if (psFirstClipSignature->ui32Mask & (1 << 3)) multiplier = 4; + else if (psFirstClipSignature->ui32Mask & (1 << 2)) multiplier = 3; + else if (psFirstClipSignature->ui32Mask & (1 << 1)) multiplier = 2; + } + } + // Add a specially crafted comment so runtime knows to enable clip planes. + // We may end up doing 2 of these, so at runtime OR the results + uint32_t clipmask = psDecl->asOperands[0].GetAccessMask(); + if (index != 0) + clipmask <<= multiplier; + bformata(psContext->glsl, "// HLSLcc_%sDistances_%x\n", glName, clipmask); - for(i=0; i < psCBuf->asVars.size(); ++i) - { - if(skipUnused && !psCBuf->asVars[i].sType.m_IsUsed) - continue; + psContext->psShader->asPhases[psContext->currentPhase].acOutputNeedsRedirect[psSignature->ui32Register] = 0xff; + bformata(psContext->glsl, "vec4 phase%d_gl%sDistance%d;\n", psContext->currentPhase, glName, index); - PreDeclareStructType(psCBuf->asVars[i].name, &psCBuf->asVars[i].sType); - } + for (int i = 0; i < max; ++i) + { + psContext->AddIndentation(); + bformata(glsl, "%s[%d] = (", builtinName, i + multiplier * index); + TranslateOperand(&psDecl->asOperands[0], TO_FLAG_NONE); + if (applySwizzle) bformata(glsl, ")%s;\n", swizzle[i]); + else bformata(glsl, ");\n"); + } + } + psContext->indent--; + psContext->currentGLSLString = &psContext->glsl; + } +} - /* [layout (location = X)] uniform vec4 HLSLConstantBufferName[numConsts]; */ - if ((psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0) - { - ASSERT(0); // Catch this to see what's going on - std::string bname = "wut"; - GLSLCrossDependencyData::VulkanResourceBinding binding = psContext->psDependencies->GetVulkanResourceBinding(bname); - bformata(glsl, "layout(set = %d, binding = %d) ", binding.first, binding.second); - } - else - { - if (HaveUniformBindingsAndLocations(psContext->psShader->eTargetLanguage, psContext->psShader->extensions, psContext->flags)) - bformata(glsl, "layout(location = %d) ", ui32BindingPoint); - } - if(useGlobalsStruct) - { - bcatcstr(glsl, "uniform struct "); - TranslateOperand(psOperand, TO_FLAG_DECLARATION_NAME); +void ToGLSL::HandleOutputRedirect(const Declaration *psDecl, const char *Precision) +{ + const Operand *psOperand = &psDecl->asOperands[0]; + Shader *psShader = psContext->psShader; + bstring glsl = *psContext->currentGLSLString; + int needsRedirect = 0; + const ShaderInfo::InOutSignature *psSig = NULL; + + int regSpace = psOperand->GetRegisterSpace(psContext); + if (regSpace == 0 && psShader->asPhases[psContext->currentPhase].acOutputNeedsRedirect[psOperand->ui32RegisterNumber] == 0xff) + { + needsRedirect = 1; + } + else if (regSpace == 1 && psShader->asPhases[psContext->currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] == 0xff) + { + needsRedirect = 1; + } + + if (needsRedirect == 1) + { + // TODO What if this is indexed? + int comp = 0; + uint32_t origMask = psOperand->ui32CompMask; + + ASSERT(psContext->psShader->aIndexedOutput[regSpace][psOperand->ui32RegisterNumber] == 0); + + psContext->AddIndentation(); + bformata(glsl, "%s vec4 phase%d_Output%d_%d;\n", Precision, psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber); + + while (comp < 4) + { + int numComps = 0; + int hasCast = 0; + uint32_t mask, i; + psSig = NULL; + if (regSpace == 0) + psContext->psShader->sInfo.GetOutputSignatureFromRegister(psOperand->ui32RegisterNumber, 1 << comp, psContext->psShader->ui32CurrentVertexOutputStream, &psSig, true); + else + psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(psOperand->ui32RegisterNumber, 1 << comp, &psSig, true); - bcatcstr(glsl, "_Type {\n"); - } + // The register isn't necessarily packed full. Continue with the next component. + if (psSig == NULL) + { + comp++; + continue; + } - for(i=0; i < psCBuf->asVars.size(); ++i) - { - if(skipUnused && !psCBuf->asVars[i].sType.m_IsUsed) - continue; + numComps = GetNumberBitsSet(psSig->ui32Mask); + mask = psSig->ui32Mask; - if(!useGlobalsStruct) - bcatcstr(glsl, "uniform "); + ((Operand *)psOperand)->ui32CompMask = 1 << comp; + bstring str = GetPostShaderCode(psContext); + TranslateOperand(str, psOperand, TO_FLAG_NAME_ONLY); + bcatcstr(str, " = "); - DeclareConstBufferShaderVariable(psCBuf->asVars[i].name.c_str(), &psCBuf->asVars[i].sType, psCBuf, 0); - } + if (psSig->eComponentType == INOUT_COMPONENT_SINT32) + { + bformata(str, HaveBitEncodingOps(psContext->psShader->eTargetLanguage) ? "floatBitsToInt(" : "int("); + hasCast = 1; + } + else if (psSig->eComponentType == INOUT_COMPONENT_UINT32) + { + bformata(str, HaveBitEncodingOps(psContext->psShader->eTargetLanguage) ? "floatBitsToUint(" : "int("); + hasCast = 1; + } + bformata(str, "phase%d_Output%d_%d.", psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber); + // Print out mask + for (i = 0; i < 4; i++) + { + if ((mask & (1 << i)) == 0) + continue; - if(useGlobalsStruct) - { - bcatcstr(glsl, "} "); + bformata(str, "%c", "xyzw"[i]); + } - TranslateOperand(psOperand, TO_FLAG_DECLARATION_NAME); + if (hasCast) + bcatcstr(str, ")"); + comp += numComps; + bcatcstr(str, ";\n"); + } - bcatcstr(glsl, ";\n"); - } + ((Operand *)psOperand)->ui32CompMask = origMask; + if (regSpace == 0) + psShader->asPhases[psContext->currentPhase].acOutputNeedsRedirect[psOperand->ui32RegisterNumber] = 0xfe; + else + psShader->asPhases[psContext->currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] = 0xfe; + } } -static const char* GetVulkanTextureType(HLSLCrossCompilerContext* psContext, - const RESOURCE_DIMENSION eDimension, - const uint32_t ui32RegisterNumber) +void ToGLSL::AddUserOutput(const Declaration* psDecl) { - const ResourceBinding* psBinding = 0; - RESOURCE_RETURN_TYPE eType = RETURN_TYPE_UNORM; - int found; - found = psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_TEXTURE, ui32RegisterNumber, &psBinding); - if (found) - { - eType = (RESOURCE_RETURN_TYPE)psBinding->ui32ReturnType; - } - switch (eDimension) - { - case RESOURCE_DIMENSION_BUFFER: - { - switch (eType) - { - case RETURN_TYPE_SINT: - return "itextureBuffer"; - case RETURN_TYPE_UINT: - return "utextureBuffer"; - default: - return "textureBuffer"; - } - break; - } - - case RESOURCE_DIMENSION_TEXTURE1D: - { - switch (eType) - { - case RETURN_TYPE_SINT: - return "itexture1D"; - case RETURN_TYPE_UINT: - return "utexture1D"; - default: - return "texture1D"; - } - break; - } - - case RESOURCE_DIMENSION_TEXTURE2D: - { - switch (eType) - { - case RETURN_TYPE_SINT: - return "itexture2D"; - case RETURN_TYPE_UINT: - return "utexture2D"; - default: - return "texture2D"; - } - break; - } - - case RESOURCE_DIMENSION_TEXTURE2DMS: - { - switch (eType) - { - case RETURN_TYPE_SINT: - return "itexture2DMS"; - case RETURN_TYPE_UINT: - return "utexture2DMS"; - default: - return "texture2DMS"; - } - break; - } - - case RESOURCE_DIMENSION_TEXTURE3D: - { - switch (eType) - { - case RETURN_TYPE_SINT: - return "itexture3D"; - case RETURN_TYPE_UINT: - return "utexture3D"; - default: - return "texture3D"; - } - break; - } - - case RESOURCE_DIMENSION_TEXTURECUBE: - { - switch (eType) - { - case RETURN_TYPE_SINT: - return "itextureCube"; - case RETURN_TYPE_UINT: - return "utextureCube"; - default: - return "textureCube"; - } - break; - } - - case RESOURCE_DIMENSION_TEXTURE1DARRAY: - { - switch (eType) - { - case RETURN_TYPE_SINT: - return "itexture1DArray"; - case RETURN_TYPE_UINT: - return "utexture1DArray"; - default: - return "texture1DArray"; - } - break; - } - - case RESOURCE_DIMENSION_TEXTURE2DARRAY: - { - switch (eType) - { - case RETURN_TYPE_SINT: - return "itexture2DArray"; - case RETURN_TYPE_UINT: - return "utexture2DArray"; - default: - return "texture2DArray"; - } - break; - } - - case RESOURCE_DIMENSION_TEXTURE2DMSARRAY: - { - switch (eType) - { - case RETURN_TYPE_SINT: - return "itexture2DMSArray"; - case RETURN_TYPE_UINT: - return "utexture2DMSArray"; - default: - return "texture2DMSArray"; - } - break; - } - - case RESOURCE_DIMENSION_TEXTURECUBEARRAY: - { - switch (eType) - { - case RETURN_TYPE_SINT: - return "itextureCubeArray"; - case RETURN_TYPE_UINT: - return "utextureCubeArray"; - default: - return "textureCubeArray"; - } - break; - } - default: - ASSERT(0); - break; - - } - - return "texture2D"; -} + bstring glsl = *psContext->currentGLSLString; + Shader* psShader = psContext->psShader; + + if (psContext->OutputNeedsDeclaring(&psDecl->asOperands[0], 1)) + { + const Operand* psOperand = &psDecl->asOperands[0]; + const char* Precision = ""; + int iNumComponents; + bstring type = NULL; + int regSpace = psDecl->asOperands[0].GetRegisterSpace(psContext); + uint32_t ui32Reg = psDecl->asOperands[0].ui32RegisterNumber; + + const ShaderInfo::InOutSignature* psSignature = NULL; + + if (regSpace == 0) + psShader->sInfo.GetOutputSignatureFromRegister( + ui32Reg, + psDecl->asOperands[0].ui32CompMask, + psShader->ui32CurrentVertexOutputStream, + &psSignature); + else + psShader->sInfo.GetPatchConstantSignatureFromRegister(ui32Reg, psDecl->asOperands[0].ui32CompMask, &psSignature); + + if (psSignature->semanticName == "POS" && psOperand->ui32RegisterNumber == 0 && psContext->psShader->eShaderType == VERTEX_SHADER) + return; + + iNumComponents = GetNumberBitsSet(psSignature->ui32Mask); + if (iNumComponents == 1) + psContext->psShader->abScalarOutput[regSpace][ui32Reg] |= (int)psDecl->asOperands[0].ui32CompMask; + + switch (psSignature->eComponentType) + { + case INOUT_COMPONENT_UINT32: + { + if (iNumComponents > 1) + type = bformat("uvec%d", iNumComponents); + else + type = bformat("uint"); + break; + } + case INOUT_COMPONENT_SINT32: + { + if (iNumComponents > 1) + type = bformat("ivec%d", iNumComponents); + else + type = bformat("int"); + break; + } + case INOUT_COMPONENT_FLOAT32: + { + if (iNumComponents > 1) + type = bformat("vec%d", iNumComponents); + else + type = bformat("float"); + break; + } + default: + ASSERT(0); + break; + } -// Not static because this is used in toGLSLInstruction.cpp when sampling Vulkan textures -const char* GetSamplerType(HLSLCrossCompilerContext* psContext, - const RESOURCE_DIMENSION eDimension, - const uint32_t ui32RegisterNumber) -{ - const ResourceBinding* psBinding = 0; - RESOURCE_RETURN_TYPE eType = RETURN_TYPE_UNORM; - int found; - found = psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_TEXTURE, ui32RegisterNumber, &psBinding); - if(found) - { - eType = (RESOURCE_RETURN_TYPE)psBinding->ui32ReturnType; - } - switch(eDimension) - { - case RESOURCE_DIMENSION_BUFFER: - { - if (IsESLanguage(psContext->psShader->eTargetLanguage)) - psContext->RequireExtension("GL_EXT_texture_buffer"); - switch(eType) - { - case RETURN_TYPE_SINT: - return "isamplerBuffer"; - case RETURN_TYPE_UINT: - return "usamplerBuffer"; - default: - return "samplerBuffer"; - } - break; - } - - case RESOURCE_DIMENSION_TEXTURE1D: - { - switch(eType) - { - case RETURN_TYPE_SINT: - return "isampler1D"; - case RETURN_TYPE_UINT: - return "usampler1D"; - default: - return "sampler1D"; - } - break; - } - - case RESOURCE_DIMENSION_TEXTURE2D: - { - switch(eType) - { - case RETURN_TYPE_SINT: - return "isampler2D"; - case RETURN_TYPE_UINT: - return "usampler2D"; - default: - return "sampler2D"; - } - break; - } - - case RESOURCE_DIMENSION_TEXTURE2DMS: - { - switch(eType) - { - case RETURN_TYPE_SINT: - return "isampler2DMS"; - case RETURN_TYPE_UINT: - return "usampler2DMS"; - default: - return "sampler2DMS"; - } - break; - } - - case RESOURCE_DIMENSION_TEXTURE3D: - { - switch(eType) - { - case RETURN_TYPE_SINT: - return "isampler3D"; - case RETURN_TYPE_UINT: - return "usampler3D"; - default: - return "sampler3D"; - } - break; - } - - case RESOURCE_DIMENSION_TEXTURECUBE: - { - switch(eType) - { - case RETURN_TYPE_SINT: - return "isamplerCube"; - case RETURN_TYPE_UINT: - return "usamplerCube"; - default: - return "samplerCube"; - } - break; - } - - case RESOURCE_DIMENSION_TEXTURE1DARRAY: - { - switch(eType) - { - case RETURN_TYPE_SINT: - return "isampler1DArray"; - case RETURN_TYPE_UINT: - return "usampler1DArray"; - default: - return "sampler1DArray"; - } - break; - } - - case RESOURCE_DIMENSION_TEXTURE2DARRAY: - { - switch(eType) - { - case RETURN_TYPE_SINT: - return "isampler2DArray"; - case RETURN_TYPE_UINT: - return "usampler2DArray"; - default: - return "sampler2DArray"; - } - break; - } - - case RESOURCE_DIMENSION_TEXTURE2DMSARRAY: - { - switch(eType) - { - case RETURN_TYPE_SINT: - return "isampler2DMSArray"; - case RETURN_TYPE_UINT: - return "usampler2DMSArray"; - default: - return "sampler2DMSArray"; - } - break; - } - - case RESOURCE_DIMENSION_TEXTURECUBEARRAY: - { - switch(eType) - { - case RETURN_TYPE_SINT: - return "isamplerCubeArray"; - case RETURN_TYPE_UINT: - return "usamplerCubeArray"; - default: - return "samplerCubeArray"; - } - break; - } - default: - ASSERT(0); - break; + if (HavePrecisionQualifiers(psContext)) + { + switch (psOperand->eMinPrecision) + { + case OPERAND_MIN_PRECISION_DEFAULT: + { + Precision = "highp "; + break; + } + case OPERAND_MIN_PRECISION_FLOAT_16: + { + Precision = "mediump "; + break; + } + case OPERAND_MIN_PRECISION_FLOAT_2_8: + { + Precision = EmitLowp(psContext) ? "lowp " : "mediump "; + break; + } + case OPERAND_MIN_PRECISION_SINT_16: + { + Precision = "mediump "; + //type = "ivec"; + break; + } + case OPERAND_MIN_PRECISION_UINT_16: + { + Precision = "mediump "; + //type = "uvec"; + break; + } + } + } + + switch (psShader->eShaderType) + { + case PIXEL_SHADER: + { + switch (psDecl->asOperands[0].eType) + { + case OPERAND_TYPE_OUTPUT_COVERAGE_MASK: + { + break; + } + case OPERAND_TYPE_OUTPUT_DEPTH: + { + if (psShader->eTargetLanguage == LANG_ES_100 && !psContext->EnableExtension("GL_EXT_frag_depth")) + { + bcatcstr(psContext->extensions, "#define gl_FragDepth gl_FragDepthEXT\n"); + } + break; + } + case OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL: + { + psContext->EnableExtension("GL_ARB_conservative_depth"); + bcatcstr(glsl, "#ifdef GL_ARB_conservative_depth\n"); + bcatcstr(glsl, "layout (depth_greater) out float gl_FragDepth;\n"); + bcatcstr(glsl, "#endif\n"); + break; + } + case OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL: + { + psContext->EnableExtension("GL_ARB_conservative_depth"); + bcatcstr(glsl, "#ifdef GL_ARB_conservative_depth\n"); + bcatcstr(glsl, "layout (depth_less) out float gl_FragDepth;\n"); + bcatcstr(glsl, "#endif\n"); + break; + } + default: + { + uint32_t renderTarget = psDecl->asOperands[0].ui32RegisterNumber; + + char OutputName[512]; + bstring oname; + oname = bformat("%s%s%d", psContext->outputPrefix, psSignature->semanticName.c_str(), renderTarget); + strncpy(OutputName, (char *)oname->data, 512); + bdestroy(oname); + + if (psShader->eTargetLanguage == LANG_ES_100 && renderTarget > 0) + psContext->EnableExtension("GL_EXT_draw_buffers"); + + bool haveFramebufferFetch = (psShader->extensions->EXT_shader_framebuffer_fetch && + psShader->eShaderType == PIXEL_SHADER && + psContext->flags & HLSLCC_FLAG_SHADER_FRAMEBUFFER_FETCH); + + if (WriteToFragData(psContext->psShader->eTargetLanguage)) + { + bformata(glsl, "#define %s gl_FragData[%d]\n", OutputName, renderTarget); + } + else + { + if (!RenderTargetDeclared(renderTarget)) + { + bstring layoutQualifier = bformat(""); + + if (HaveInOutLocationQualifier(psContext->psShader->eTargetLanguage) || + HaveLimitedInOutLocationQualifier(psContext->psShader->eTargetLanguage, psContext->psShader->extensions)) + { + uint32_t index = 0; + + if ((psContext->flags & HLSLCC_FLAG_DUAL_SOURCE_BLENDING) && DualSourceBlendSupported(psContext->psShader->eTargetLanguage)) + { + if (renderTarget > 0) + { + renderTarget = 0; + index = 1; + } + bdestroy(layoutQualifier); + layoutQualifier = bformat("layout(location = %d, index = %d) ", renderTarget, index); + } + else + { + bdestroy(layoutQualifier); + layoutQualifier = bformat("layout(location = %d) ", renderTarget); + } + } + + auto lq = bstr2cstr(layoutQualifier, '\0'); + + if (haveFramebufferFetch) + { + bcatcstr(glsl, "#ifdef GL_EXT_shader_framebuffer_fetch\n"); + bformata(glsl, "%sinout %s%s %s;\n", lq, Precision, type->data, OutputName); + bcatcstr(glsl, "#else\n"); + bformata(glsl, "%sout %s%s %s;\n", lq, Precision, type->data, OutputName); + bcatcstr(glsl, "#endif\n"); + } + else + bformata(glsl, "%sout %s%s %s;\n", lq, Precision, type->data, OutputName); + + bcstrfree(lq); + bdestroy(layoutQualifier); + } + } + break; + } + } + break; + } + case VERTEX_SHADER: + case GEOMETRY_SHADER: + case DOMAIN_SHADER: + case HULL_SHADER: + { + const char* Interpolation = ""; + char OutputName[512]; + bstring oname; + oname = bformat("%s%s%s%d", psContext->outputPrefix, regSpace == 0 ? "" : "patch", psSignature->semanticName.c_str(), psSignature->ui32SemanticIndex); + strncpy(OutputName, (char *)oname->data, 512); + bdestroy(oname); + + if (psShader->eShaderType == VERTEX_SHADER) + { + if (psSignature->eComponentType == INOUT_COMPONENT_UINT32 || + psSignature->eComponentType == INOUT_COMPONENT_SINT32) // GLSL spec requires that integer vertex outputs always have "flat" interpolation + { + Interpolation = GetInterpolationString(INTERPOLATION_CONSTANT, psContext->psShader->eTargetLanguage); + } + else if (psContext->psDependencies) // For floats we get the interpolation that was resolved from the fragment shader input + { + Interpolation = GetInterpolationString(psContext->psDependencies->GetInterpolationMode(psDecl->asOperands[0].ui32RegisterNumber), psContext->psShader->eTargetLanguage); + } + } - } + if (HaveInOutLocationQualifier(psContext->psShader->eTargetLanguage)) + { + bformata(glsl, "layout(location = %d) ", psContext->psDependencies->GetVaryingLocation(std::string(OutputName), psShader->eShaderType, false)); + } - return "sampler2D"; -} + if (InOutSupported(psContext->psShader->eTargetLanguage)) + { + if (psContext->psShader->eShaderType == HULL_SHADER) + { + // In Hull shaders outputs are either per-vertex (and need []) or per-patch (need 'out patch') + if (regSpace == 0) + bformata(glsl, "%sout %s%s %s[];\n", Interpolation, Precision, type->data, OutputName); + else + bformata(glsl, "patch %sout %s%s %s;\n", Interpolation, Precision, type->data, OutputName); + } + else + bformata(glsl, "%sout %s%s %s;\n", Interpolation, Precision, type->data, OutputName); + } + else + { + bformata(glsl, "%svarying %s%s %s;\n", Interpolation, Precision, type->data, OutputName); + } -static const char *GetSamplerPrecision(const HLSLCrossCompilerContext *psContext, REFLECT_RESOURCE_PRECISION ePrec) -{ - if (!HavePrecisionQualifiers(psContext)) - return " "; - - switch (ePrec) - { - default: - case REFLECT_RESOURCE_PRECISION_UNKNOWN: - case REFLECT_RESOURCE_PRECISION_LOWP: - return EmitLowp(psContext) ? "lowp " : "mediump "; - case REFLECT_RESOURCE_PRECISION_HIGHP: - return "highp "; - case REFLECT_RESOURCE_PRECISION_MEDIUMP: - return "mediump "; - } + break; + } + default: + ASSERT(0); + break; + } + HandleOutputRedirect(psDecl, Precision); + bdestroy(type); + } } -static void TranslateVulkanResource(HLSLCrossCompilerContext* psContext, const Declaration* psDecl) + +void ToGLSL::DeclareUBOConstants(const uint32_t ui32BindingPoint, const ConstantBuffer* psCBuf, bstring glsl) { - bstring glsl = *psContext->currentGLSLString; - Shader* psShader = psContext->psShader; + uint32_t i; - const ResourceBinding *psBinding = NULL; - psShader->sInfo.GetResourceFromBindingPoint(RGROUP_TEXTURE, psDecl->asOperands[0].ui32RegisterNumber, &psBinding); - ASSERT(psBinding != NULL); + bool skipUnused = false; - const char *samplerPrecision = GetSamplerPrecision(psContext, psBinding ? psBinding->ePrecision : REFLECT_RESOURCE_PRECISION_UNKNOWN); - std::string tname = ResourceName(psContext, RGROUP_TEXTURE, psDecl->asOperands[0].ui32RegisterNumber, 0); + if ((psContext->flags & HLSLCC_FLAG_REMOVE_UNUSED_GLOBALS) && psCBuf->name == "$Globals") + skipUnused = true; - const char* samplerTypeName = GetVulkanTextureType(psContext, - psDecl->value.eResourceDimension, - psDecl->asOperands[0].ui32RegisterNumber); - GLSLCrossDependencyData::VulkanResourceBinding binding = psContext->psDependencies->GetVulkanResourceBinding(tname); - bformata(glsl, "layout(set = %d, binding = %d) ", binding.first, binding.second); - bcatcstr(glsl, "uniform "); - bcatcstr(glsl, samplerPrecision); - bcatcstr(glsl, samplerTypeName); - bcatcstr(glsl, " "); - bcatcstr(glsl, tname.c_str()); - bcatcstr(glsl, ";\n"); + std::string cbName = psCBuf->name; + if (cbName == "$Globals") + { + // Need to tweak Globals struct name to prevent clashes between shader stages + char prefix = 'A'; + switch (psContext->psShader->eShaderType) + { + default: + ASSERT(0); + break; + case COMPUTE_SHADER: + prefix = 'C'; + break; + case VERTEX_SHADER: + prefix = 'V'; + break; + case PIXEL_SHADER: + prefix = 'P'; + break; + case GEOMETRY_SHADER: + prefix = 'G'; + break; + case HULL_SHADER: + prefix = 'H'; + break; + case DOMAIN_SHADER: + prefix = 'D'; + break; + } + cbName[0] = prefix; + } + + for (i = 0; i < psCBuf->asVars.size(); ++i) + { + if (skipUnused && !psCBuf->asVars[i].sType.m_IsUsed) + continue; + + PreDeclareStructType(psCBuf->asVars[i].name, &psCBuf->asVars[i].sType); + } + + if (psContext->flags & HLSLCC_FLAG_WRAP_UBO) + bformata(glsl, "#ifndef HLSLCC_DISABLE_UNIFORM_BUFFERS\n#define UNITY_UNIFORM\n"); + + /* [layout (location = X)] uniform vec4 HLSLConstantBufferName[numConsts]; */ + if ((psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0) + { + GLSLCrossDependencyData::VulkanResourceBinding binding = psContext->psDependencies->GetVulkanResourceBinding(cbName, false, 1); + bformata(glsl, "layout(set = %d, binding = %d, std140) ", binding.first, binding.second); + } + else + { + if (HaveUniformBindingsAndLocations(psContext->psShader->eTargetLanguage, psContext->psShader->extensions, psContext->flags)) + bformata(glsl, "layout(binding = %d, std140) ", ui32BindingPoint); + else + bcatcstr(glsl, "layout(std140) "); + } + + bformata(glsl, "uniform %s {\n", cbName.c_str()); + + if (psContext->flags & HLSLCC_FLAG_WRAP_UBO) + bformata(glsl, "#else\n#define UNITY_UNIFORM uniform\n#endif\n"); + + for (i = 0; i < psCBuf->asVars.size(); ++i) + { + if (skipUnused && !psCBuf->asVars[i].sType.m_IsUsed) + continue; + + DeclareConstBufferShaderVariable(psCBuf->asVars[i].name.c_str(), + &psCBuf->asVars[i].sType, psCBuf, 0, psContext->flags & HLSLCC_FLAG_WRAP_UBO ? true : false); + } + + if (psContext->flags & HLSLCC_FLAG_WRAP_UBO) + bformata(glsl, "#ifndef HLSLCC_DISABLE_UNIFORM_BUFFERS\n"); + + + if (psContext->flags & HLSLCC_FLAG_UNIFORM_BUFFER_OBJECT_WITH_INSTANCE_NAME) + { + std::string instanceName = UniformBufferInstanceName(psContext, psCBuf->name); + bformata(glsl, "} %s;\n", instanceName.c_str()); + } + else + bcatcstr(glsl, "};\n"); + + if (psContext->flags & HLSLCC_FLAG_WRAP_UBO) + bformata(glsl, "#endif\n#undef UNITY_UNIFORM\n"); } -static void TranslateResourceTexture(HLSLCrossCompilerContext* psContext, const Declaration* psDecl, uint32_t samplerCanDoShadowCmp) +bool DeclareRWStructuredBufferTemplateTypeAsInteger(HLSLCrossCompilerContext* psContext, const Operand* psOperand) { - bstring glsl = *psContext->currentGLSLString; - Shader* psShader = psContext->psShader; - const char *samplerPrecision = NULL; - std::set::iterator i; - - const char* samplerTypeName = GetSamplerType(psContext, - psDecl->value.eResourceDimension, - psDecl->asOperands[0].ui32RegisterNumber); - - if (psDecl->value.eResourceDimension == RESOURCE_DIMENSION_TEXTURECUBEARRAY - && !HaveCubemapArray(psContext->psShader->eTargetLanguage)) - { - // Need to enable extension (either OES or ARB), but we only need to add it once - if (IsESLanguage(psContext->psShader->eTargetLanguage)) - { - psContext->EnableExtension("GL_OES_texture_cube_map_array"); - psContext->EnableExtension("GL_EXT_texture_cube_map_array"); - } - else - psContext->RequireExtension("GL_ARB_texture_cube_map_array"); - } - - if (psContext->psShader->eTargetLanguage == LANG_ES_100 && samplerCanDoShadowCmp && psDecl->ui32IsShadowTex) - { - psContext->EnableExtension("GL_EXT_shadow_samplers"); - } - - const ResourceBinding *psBinding = NULL; - psShader->sInfo.GetResourceFromBindingPoint(RGROUP_TEXTURE, psDecl->asOperands[0].ui32RegisterNumber, &psBinding); - ASSERT(psBinding != NULL); - - samplerPrecision = GetSamplerPrecision(psContext, psBinding ? psBinding->ePrecision : REFLECT_RESOURCE_PRECISION_UNKNOWN); - - if (psContext->flags & HLSLCC_FLAG_COMBINE_TEXTURE_SAMPLERS) - { - if(samplerCanDoShadowCmp && psDecl->ui32IsShadowTex) - { - for (i = psDecl->samplersUsed.begin(); i != psDecl->samplersUsed.end(); i++) - { - std::string tname = TextureSamplerName(&psShader->sInfo, psDecl->asOperands[0].ui32RegisterNumber, *i, 1); - bcatcstr(glsl, "uniform "); - bcatcstr(glsl, samplerPrecision); - bcatcstr(glsl, samplerTypeName); - bcatcstr(glsl, "Shadow "); - bcatcstr(glsl, tname.c_str()); - bcatcstr(glsl, ";\n"); - } - } - for (i = psDecl->samplersUsed.begin(); i != psDecl->samplersUsed.end(); i++) - { - std::string tname = TextureSamplerName(&psShader->sInfo, psDecl->asOperands[0].ui32RegisterNumber, *i, 0); - bcatcstr(glsl, "uniform "); - bcatcstr(glsl, samplerPrecision); - bcatcstr(glsl, samplerTypeName); - bcatcstr(glsl, " "); - bcatcstr(glsl, tname.c_str()); - bcatcstr(glsl, ";\n"); - } - } - - if(samplerCanDoShadowCmp && psDecl->ui32IsShadowTex) - { - //Create shadow and non-shadow sampler. - //HLSL does not have separate types for depth compare, just different functions. - std::string tname = ResourceName(psContext, RGROUP_TEXTURE, psDecl->asOperands[0].ui32RegisterNumber, 1); - - bcatcstr(glsl, "uniform "); - bcatcstr(glsl, samplerPrecision); - bcatcstr(glsl, samplerTypeName); - bcatcstr(glsl, "Shadow "); - bcatcstr(glsl, tname.c_str()); - bcatcstr(glsl, ";\n"); - } - - std::string tname = ResourceName(psContext, RGROUP_TEXTURE, psDecl->asOperands[0].ui32RegisterNumber, 0); - - bcatcstr(glsl, "uniform "); - bcatcstr(glsl, samplerPrecision); - bcatcstr(glsl, samplerTypeName); - bcatcstr(glsl, " "); - bcatcstr(glsl, tname.c_str()); - bcatcstr(glsl, ";\n"); + // with cases like: RWStructuredBuffer myBuffer; /*...*/ AtomicMin(myBuffer[0].x , myInt); + // if we translate RWStructuredBuffer template type to uint, incorrect version of the function might be called ( AtomicMin(uint..) instead of AtomicMin(int..) ) + // we try to avoid this case by using integer type in those cases + if (psContext && psOperand) + { + const bool isVulkan = (psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0; + if (!isVulkan) + { + if (psContext->psShader && HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) + { + uint32_t ui32BindingPoint = psOperand->ui32RegisterNumber; + const ResourceBinding* psBinding = NULL; + psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_UAV, ui32BindingPoint, &psBinding); + if (psBinding) + { + const ConstantBuffer* psBuffer = NULL; + psContext->psShader->sInfo.GetConstantBufferFromBindingPoint(RGROUP_UAV, psBinding->ui32BindPoint, &psBuffer); + if (psBuffer && psBuffer->asVars.size() == 1 && psBuffer->asVars[0].sType.Type == SVT_INT /*&& psContext->IsSwitch()*/) + return true; + } + } + } + } + return false; } -void ToGLSL::HandleInputRedirect(const Declaration *psDecl, const char *Precision) +static void DeclareBufferVariable(HLSLCrossCompilerContext* psContext, uint32_t ui32BindingPoint, + const Operand* psOperand, const uint32_t ui32GloballyCoherentAccess, + const uint32_t isRaw, const uint32_t isUAV, const uint32_t hasEmbeddedCounter, const uint32_t stride, bstring glsl) { - Operand *psOperand = (Operand *)&psDecl->asOperands[0]; - Shader *psShader = psContext->psShader; - bstring glsl = *psContext->currentGLSLString; - int needsRedirect = 0; - const ShaderInfo::InOutSignature *psSig = NULL; - - int regSpace = psOperand->GetRegisterSpace(psContext); - if (regSpace == 0) - { - if (psShader->asPhases[psContext->currentPhase].acInputNeedsRedirect[psOperand->ui32RegisterNumber] == 0xff) - needsRedirect = 1; - } - else if (psShader->asPhases[psContext->currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] == 0xff) - { - psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->ui32CompMask, &psSig); - needsRedirect = 1; - } - - if (needsRedirect == 1) - { - // TODO What if this is indexed? - ShaderPhase *psPhase = &psShader->asPhases[psContext->currentPhase]; - int needsLooping = 0; - int i = 0; - uint32_t origArraySize = 0; - uint32_t origMask = psOperand->ui32CompMask; - - ASSERT(psContext->psShader->aIndexedInput[regSpace][psOperand->ui32RegisterNumber] == 0); - - psContext->AddIndentation(); - // Does the input have multiple array components (such as geometry shader input, or domain shader control point input) - if ((psShader->eShaderType == DOMAIN_SHADER && regSpace == 0) || (psShader->eShaderType == GEOMETRY_SHADER)) - { - // The count is actually stored in psOperand->aui32ArraySizes[0] - origArraySize = psOperand->aui32ArraySizes[0]; - bformata(glsl, "%s vec4 phase%d_Input%d_%d[%d];\n", Precision, psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber, origArraySize); - needsLooping = 1; - i = origArraySize - 1; - } - else - bformata(glsl, "%s vec4 phase%d_Input%d_%d;\n", Precision, psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber); - - psContext->currentGLSLString = &psPhase->earlyMain; - psContext->indent++; - - // Do a conditional loop. In normal cases needsLooping == 0 so this is only run once. - do - { - int comp = 0; - psContext->AddIndentation(); - if (needsLooping) - bformata(psPhase->earlyMain, "phase%d_Input%d_%d[%d] = vec4(", psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber, i); - else - bformata(psPhase->earlyMain, "phase%d_Input%d_%d = vec4(", psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber); - - while (comp < 4) - { - int numComps = 0; - int hasCast = 0; - int hasSig = 0; - if (regSpace == 0) - hasSig = psContext->psShader->sInfo.GetInputSignatureFromRegister(psOperand->ui32RegisterNumber, 1 << comp, &psSig, true); - else - hasSig = psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(psOperand->ui32RegisterNumber, 1 << comp, &psSig, true); - - if (hasSig) - { - numComps = GetNumberBitsSet(psSig->ui32Mask); - if (psSig->eComponentType == INOUT_COMPONENT_SINT32) - { - bformata(psPhase->earlyMain, HaveBitEncodingOps(psContext->psShader->eTargetLanguage) ? "intBitsToFloat(" : "float("); - hasCast = 1; - } - else if (psSig->eComponentType == INOUT_COMPONENT_UINT32) - { - bformata(psPhase->earlyMain, HaveBitEncodingOps(psContext->psShader->eTargetLanguage) ? "uintBitsToFloat(" : "float("); - hasCast = 1; - } - - // Override the array size of the operand so TranslateOperand call below prints the correct index - if (needsLooping) - psOperand->aui32ArraySizes[0] = i; - - // And the component mask - psOperand->ui32CompMask = 1 << comp; - - TranslateOperand(psOperand, TO_FLAG_NAME_ONLY); - - // Restore the original array size value and mask - psOperand->ui32CompMask = origMask; - if (needsLooping) - psOperand->aui32ArraySizes[0] = origArraySize; - - if (hasCast) - bcatcstr(psPhase->earlyMain, ")"); - comp += numComps; - } - else // no signature found -> fill with zero - { - bcatcstr(psPhase->earlyMain, "0"); - comp++; - } - - if (comp < 4) - bcatcstr(psPhase->earlyMain, ", "); - } - bcatcstr(psPhase->earlyMain, ");\n"); - - } while ((--i) >= 0); - - psContext->currentGLSLString = &psContext->glsl; - psContext->indent--; - - if (regSpace == 0) - psShader->asPhases[psContext->currentPhase].acInputNeedsRedirect[psOperand->ui32RegisterNumber] = 0xfe; - else - psShader->asPhases[psContext->currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] = 0xfe; - } + const bool isVulkan = (psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0; + bstring BufNamebstr = bfromcstr(""); + // Use original HLSL bindings for UAVs only. For non-UAV buffers we have resolved new binding points from the same register space. + if (!isUAV && !isVulkan) + ui32BindingPoint = psContext->psShader->aui32StructuredBufferBindingPoints[psContext->psShader->ui32CurrentStructuredBufferIndex++]; + + ResourceName(BufNamebstr, psContext, isUAV ? RGROUP_UAV : RGROUP_TEXTURE, psOperand->ui32RegisterNumber, 0); + + char *btmp = bstr2cstr(BufNamebstr, '\0'); + std::string BufName = btmp; + bcstrfree(btmp); + bdestroy(BufNamebstr); + + // Declare the struct type for structured buffers + if (!isRaw) + { + const char* typeStr = "uint"; + if (isUAV && DeclareRWStructuredBufferTemplateTypeAsInteger(psContext, psOperand)) + typeStr = "int"; + bformata(glsl, " struct %s_type {\n\t%s[%d] value;\n};\n\n", BufName.c_str(), typeStr, stride / 4); + } + + if (isVulkan) + { + GLSLCrossDependencyData::VulkanResourceBinding binding = psContext->psDependencies->GetVulkanResourceBinding(BufName); + bformata(glsl, "layout(set = %d, binding = %d, std430) ", binding.first, binding.second); + } + else + { + bformata(glsl, "layout(std430, binding = %d) ", ui32BindingPoint); + } + + if (ui32GloballyCoherentAccess & GLOBALLY_COHERENT_ACCESS) + bcatcstr(glsl, "coherent "); + + if (!isUAV) + bcatcstr(glsl, "readonly "); + + bformata(glsl, "buffer %s {\n\t", BufName.c_str()); + + if (hasEmbeddedCounter) + bformata(glsl, "coherent uint %s_counter;\n\t", BufName.c_str()); + + if (isRaw) + { + if (HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) + bcatcstr(glsl, "uint"); + else + bcatcstr(glsl, "int"); + } + else + bformata(glsl, "%s_type", BufName.c_str()); + + bformata(glsl, " %s_buf[];\n};\n", BufName.c_str()); } -void ToGLSL::TranslateDeclaration(const Declaration* psDecl) +void ToGLSL::DeclareStructConstants(const uint32_t ui32BindingPoint, + const ConstantBuffer* psCBuf, const Operand* psOperand, + bstring glsl) { - bstring glsl = *psContext->currentGLSLString; - Shader* psShader = psContext->psShader; - - switch(psDecl->eOpcode) - { - case OPCODE_DCL_INPUT_SGV: - case OPCODE_DCL_INPUT_PS_SGV: - { - const SPECIAL_NAME eSpecialName = psDecl->asOperands[0].eSpecialName; - switch(eSpecialName) - { - case NAME_POSITION: - { - AddBuiltinInput(psDecl, "gl_Position"); - break; - } - case NAME_RENDER_TARGET_ARRAY_INDEX: - { - AddBuiltinInput(psDecl, "gl_Layer"); - if (psShader->eShaderType == VERTEX_SHADER) - { - psContext->RequireExtension("GL_AMD_vertex_shader_layer"); - } - - break; - } - case NAME_CLIP_DISTANCE: - { - AddBuiltinInput(psDecl, "gl_ClipDistance"); - break; - } - case NAME_CULL_DISTANCE: - { - AddBuiltinInput(psDecl, "gl_CullDistance"); - break; - } - case NAME_VIEWPORT_ARRAY_INDEX: - { - AddBuiltinInput(psDecl, "gl_ViewportIndex"); - break; - } - case NAME_INSTANCE_ID: - { - AddBuiltinInput(psDecl, "gl_InstanceID"); - break; - } - case NAME_IS_FRONT_FACE: - { - /* - Cast to int used because - if(gl_FrontFacing != 0) failed to compiled on Intel HD 4000. - Suggests no implicit conversion for bool<->int. - */ + uint32_t i; + int useGlobalsStruct = 1; + bool skipUnused = false; + + if ((psContext->flags & HLSLCC_FLAG_DISABLE_GLOBALS_STRUCT) && psCBuf->name[0] == '$') + useGlobalsStruct = 0; + + if ((psContext->flags & HLSLCC_FLAG_REMOVE_UNUSED_GLOBALS) && psCBuf->name == "$Globals") + skipUnused = true; + + if ((psContext->flags & HLSLCC_FLAG_UNIFORM_BUFFER_OBJECT) == 0) + useGlobalsStruct = 0; + + + for (i = 0; i < psCBuf->asVars.size(); ++i) + { + if (skipUnused && !psCBuf->asVars[i].sType.m_IsUsed) + continue; + + PreDeclareStructType(psCBuf->asVars[i].name, &psCBuf->asVars[i].sType); + } + + /* [layout (location = X)] uniform vec4 HLSLConstantBufferName[numConsts]; */ + if ((psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0) + { + ASSERT(0); // Catch this to see what's going on + std::string bname = "wut"; + GLSLCrossDependencyData::VulkanResourceBinding binding = psContext->psDependencies->GetVulkanResourceBinding(bname); + bformata(glsl, "layout(set = %d, binding = %d) ", binding.first, binding.second); + } + else + { + if (HaveUniformBindingsAndLocations(psContext->psShader->eTargetLanguage, psContext->psShader->extensions, psContext->flags)) + bformata(glsl, "layout(location = %d) ", ui32BindingPoint); + } + if (useGlobalsStruct) + { + bcatcstr(glsl, "uniform struct "); + TranslateOperand(psOperand, TO_FLAG_DECLARATION_NAME); + + bcatcstr(glsl, "_Type {\n"); + } + + for (i = 0; i < psCBuf->asVars.size(); ++i) + { + if (skipUnused && !psCBuf->asVars[i].sType.m_IsUsed) + continue; + + if (!useGlobalsStruct) + bcatcstr(glsl, "uniform "); + + DeclareConstBufferShaderVariable(psCBuf->asVars[i].name.c_str(), &psCBuf->asVars[i].sType, psCBuf, 0); + } + + if (useGlobalsStruct) + { + bcatcstr(glsl, "} "); + + TranslateOperand(psOperand, TO_FLAG_DECLARATION_NAME); + + bcatcstr(glsl, ";\n"); + } +} - if (HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) - AddBuiltinInput(psDecl, "(gl_FrontFacing ? 0xffffffffu : uint(0))"); // Hi Adreno. - else - AddBuiltinInput(psDecl, "(gl_FrontFacing ? int(1) : int(0))"); - break; - } - case NAME_SAMPLE_INDEX: - { - AddBuiltinInput(psDecl, "gl_SampleID"); - break; - } - case NAME_VERTEX_ID: - { - AddBuiltinInput(psDecl, "gl_VertexID"); - break; - } - case NAME_PRIMITIVE_ID: - { - if(psShader->eShaderType == GEOMETRY_SHADER) - AddBuiltinInput(psDecl, "gl_PrimitiveIDIn"); // LOL opengl. - else - AddBuiltinInput(psDecl, "gl_PrimitiveID"); - break; - } - default: - { - bformata(glsl, "in vec4 %s;\n", psDecl->asOperands[0].specialName.c_str()); - } - } - break; - } - - case OPCODE_DCL_OUTPUT_SIV: - { - switch(psDecl->asOperands[0].eSpecialName) - { - case NAME_POSITION: - { - AddBuiltinOutput(psDecl, 0, "gl_Position"); - break; - } - case NAME_RENDER_TARGET_ARRAY_INDEX: - { - AddBuiltinOutput(psDecl, 0, "gl_Layer"); - if (psShader->eShaderType == VERTEX_SHADER) - { - psContext->RequireExtension("GL_AMD_vertex_shader_layer"); - } +static const char* GetVulkanTextureType(HLSLCrossCompilerContext* psContext, + const RESOURCE_DIMENSION eDimension, + const uint32_t ui32RegisterNumber) +{ + const ResourceBinding* psBinding = 0; + RESOURCE_RETURN_TYPE eType = RETURN_TYPE_UNORM; + int found; + found = psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_TEXTURE, ui32RegisterNumber, &psBinding); + if (found) + { + eType = (RESOURCE_RETURN_TYPE)psBinding->ui32ReturnType; + } + switch (eDimension) + { + case RESOURCE_DIMENSION_BUFFER: + { + switch (eType) + { + case RETURN_TYPE_SINT: + return "itextureBuffer"; + case RETURN_TYPE_UINT: + return "utextureBuffer"; + default: + return "textureBuffer"; + } + break; + } - break; - } - case NAME_CLIP_DISTANCE: - { - AddBuiltinOutput(psDecl, 0, "gl_ClipDistance"); - break; - } - case NAME_CULL_DISTANCE: - { - AddBuiltinOutput(psDecl, 0, "gl_CullDistance"); - break; - } - case NAME_VIEWPORT_ARRAY_INDEX: - { - AddBuiltinOutput(psDecl, 0, "gl_ViewportIndex"); - break; - } - case NAME_VERTEX_ID: - { - ASSERT(0); //VertexID is not an output - break; - } - case NAME_PRIMITIVE_ID: - { - AddBuiltinOutput(psDecl, 0, "gl_PrimitiveID"); - break; - } - case NAME_INSTANCE_ID: - { - ASSERT(0); //InstanceID is not an output - break; - } - case NAME_IS_FRONT_FACE: - { - ASSERT(0); //FrontFacing is not an output - break; - } - case NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR: - { - if(psContext->psShader->aIndexedOutput[1][psDecl->asOperands[0].ui32RegisterNumber]) - { - AddBuiltinOutput(psDecl, 4, "gl_TessLevelOuter"); - } - else - { - AddBuiltinOutput(psDecl, 0, "gl_TessLevelOuter[0]"); - } - break; - } - case NAME_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR: - { - AddBuiltinOutput(psDecl, 0, "gl_TessLevelOuter[1]"); - break; - } - case NAME_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR: - { - AddBuiltinOutput(psDecl, 0, "gl_TessLevelOuter[2]"); - break; - } - case NAME_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR: - { - AddBuiltinOutput(psDecl, 0, "gl_TessLevelOuter[3]"); - break; - } - case NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR: - { - if(psContext->psShader->aIndexedOutput[1][psDecl->asOperands[0].ui32RegisterNumber]) - { - AddBuiltinOutput(psDecl, 3, "gl_TessLevelOuter"); - } - else - { - AddBuiltinOutput(psDecl, 0, "gl_TessLevelOuter[0]"); - } - break; - } - case NAME_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR: - { - AddBuiltinOutput(psDecl, 0, "gl_TessLevelOuter[1]"); - break; - } - case NAME_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR: - { - AddBuiltinOutput(psDecl, 0, "gl_TessLevelOuter[2]"); - break; - } - case NAME_FINAL_LINE_DENSITY_TESSFACTOR: - { - if(psContext->psShader->aIndexedOutput[1][psDecl->asOperands[0].ui32RegisterNumber]) - { - AddBuiltinOutput(psDecl, 2, "gl_TessLevelOuter"); - } - else - { - AddBuiltinOutput(psDecl, 0, "gl_TessLevelOuter[0]"); - } - break; - } - case NAME_FINAL_LINE_DETAIL_TESSFACTOR: - { - AddBuiltinOutput(psDecl, 0, "gl_TessLevelOuter[1]"); - break; - } - case NAME_FINAL_TRI_INSIDE_TESSFACTOR: - case NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR: - { - if(psContext->psShader->aIndexedOutput[1][psDecl->asOperands[0].ui32RegisterNumber]) - { - AddBuiltinOutput(psDecl, 2, "gl_TessLevelInner"); - } - else - { - AddBuiltinOutput(psDecl, 0, "gl_TessLevelInner[0]"); - } - break; - } - case NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR: - { - AddBuiltinOutput(psDecl, 0, "gl_TessLevelInner[1]"); - break; - } - default: - { - // Sometimes DX compiler seems to declare patch constant outputs like this. Anyway, nothing for us to do. -// bformata(glsl, "out vec4 %s;\n", psDecl->asOperands[0].specialName.c_str()); + case RESOURCE_DIMENSION_TEXTURE1D: + { + switch (eType) + { + case RETURN_TYPE_SINT: + return "itexture1D"; + case RETURN_TYPE_UINT: + return "utexture1D"; + default: + return "texture1D"; + } + break; + } + + case RESOURCE_DIMENSION_TEXTURE2D: + { + switch (eType) + { + case RETURN_TYPE_SINT: + return "itexture2D"; + case RETURN_TYPE_UINT: + return "utexture2D"; + default: + return "texture2D"; + } + break; + } + + case RESOURCE_DIMENSION_TEXTURE2DMS: + { + switch (eType) + { + case RETURN_TYPE_SINT: + return "itexture2DMS"; + case RETURN_TYPE_UINT: + return "utexture2DMS"; + default: + return "texture2DMS"; + } + break; + } + + case RESOURCE_DIMENSION_TEXTURE3D: + { + switch (eType) + { + case RETURN_TYPE_SINT: + return "itexture3D"; + case RETURN_TYPE_UINT: + return "utexture3D"; + default: + return "texture3D"; + } + break; + } + + case RESOURCE_DIMENSION_TEXTURECUBE: + { + switch (eType) + { + case RETURN_TYPE_SINT: + return "itextureCube"; + case RETURN_TYPE_UINT: + return "utextureCube"; + default: + return "textureCube"; + } + break; + } + + case RESOURCE_DIMENSION_TEXTURE1DARRAY: + { + switch (eType) + { + case RETURN_TYPE_SINT: + return "itexture1DArray"; + case RETURN_TYPE_UINT: + return "utexture1DArray"; + default: + return "texture1DArray"; + } + break; + } + + case RESOURCE_DIMENSION_TEXTURE2DARRAY: + { + switch (eType) + { + case RETURN_TYPE_SINT: + return "itexture2DArray"; + case RETURN_TYPE_UINT: + return "utexture2DArray"; + default: + return "texture2DArray"; + } + break; + } + + case RESOURCE_DIMENSION_TEXTURE2DMSARRAY: + { + switch (eType) + { + case RETURN_TYPE_SINT: + return "itexture2DMSArray"; + case RETURN_TYPE_UINT: + return "utexture2DMSArray"; + default: + return "texture2DMSArray"; + } + break; + } + + case RESOURCE_DIMENSION_TEXTURECUBEARRAY: + { + switch (eType) + { + case RETURN_TYPE_SINT: + return "itextureCubeArray"; + case RETURN_TYPE_UINT: + return "utextureCubeArray"; + default: + return "textureCubeArray"; + } + break; + } + default: + ASSERT(0); + break; + } + + return "texture2D"; +} + +// Not static because this is used in toGLSLInstruction.cpp when sampling Vulkan textures +const char* GetSamplerType(HLSLCrossCompilerContext* psContext, + const RESOURCE_DIMENSION eDimension, + const uint32_t ui32RegisterNumber) +{ + const ResourceBinding* psBinding = 0; + RESOURCE_RETURN_TYPE eType = RETURN_TYPE_UNORM; + int found; + found = psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_TEXTURE, ui32RegisterNumber, &psBinding); + if (found) + { + eType = (RESOURCE_RETURN_TYPE)psBinding->ui32ReturnType; + } + switch (eDimension) + { + case RESOURCE_DIMENSION_BUFFER: + { + if (IsESLanguage(psContext->psShader->eTargetLanguage)) + psContext->RequireExtension("GL_EXT_texture_buffer"); + switch (eType) + { + case RETURN_TYPE_SINT: + return "isamplerBuffer"; + case RETURN_TYPE_UINT: + return "usamplerBuffer"; + default: + return "samplerBuffer"; + } + break; + } + + case RESOURCE_DIMENSION_TEXTURE1D: + { + switch (eType) + { + case RETURN_TYPE_SINT: + return "isampler1D"; + case RETURN_TYPE_UINT: + return "usampler1D"; + default: + return "sampler1D"; + } + break; + } + + case RESOURCE_DIMENSION_TEXTURE2D: + { + switch (eType) + { + case RETURN_TYPE_SINT: + return "isampler2D"; + case RETURN_TYPE_UINT: + return "usampler2D"; + default: + return "sampler2D"; + } + break; + } + + case RESOURCE_DIMENSION_TEXTURE2DMS: + { + switch (eType) + { + case RETURN_TYPE_SINT: + return "isampler2DMS"; + case RETURN_TYPE_UINT: + return "usampler2DMS"; + default: + return "sampler2DMS"; + } + break; + } + + case RESOURCE_DIMENSION_TEXTURE3D: + { + switch (eType) + { + case RETURN_TYPE_SINT: + return "isampler3D"; + case RETURN_TYPE_UINT: + return "usampler3D"; + default: + return "sampler3D"; + } + break; + } + + case RESOURCE_DIMENSION_TEXTURECUBE: + { + switch (eType) + { + case RETURN_TYPE_SINT: + return "isamplerCube"; + case RETURN_TYPE_UINT: + return "usamplerCube"; + default: + return "samplerCube"; + } + break; + } + + case RESOURCE_DIMENSION_TEXTURE1DARRAY: + { + switch (eType) + { + case RETURN_TYPE_SINT: + return "isampler1DArray"; + case RETURN_TYPE_UINT: + return "usampler1DArray"; + default: + return "sampler1DArray"; + } + break; + } + + case RESOURCE_DIMENSION_TEXTURE2DARRAY: + { + switch (eType) + { + case RETURN_TYPE_SINT: + return "isampler2DArray"; + case RETURN_TYPE_UINT: + return "usampler2DArray"; + default: + return "sampler2DArray"; + } + break; + } + + case RESOURCE_DIMENSION_TEXTURE2DMSARRAY: + { + switch (eType) + { + case RETURN_TYPE_SINT: + return "isampler2DMSArray"; + case RETURN_TYPE_UINT: + return "usampler2DMSArray"; + default: + return "sampler2DMSArray"; + } + break; + } + + case RESOURCE_DIMENSION_TEXTURECUBEARRAY: + { + switch (eType) + { + case RETURN_TYPE_SINT: + return "isamplerCubeArray"; + case RETURN_TYPE_UINT: + return "usamplerCubeArray"; + default: + return "samplerCubeArray"; + } + break; + } + default: + ASSERT(0); + break; + } + + return "sampler2D"; +} + +static const char *GetSamplerPrecision(const HLSLCrossCompilerContext *psContext, REFLECT_RESOURCE_PRECISION ePrec) +{ + if (!HavePrecisionQualifiers(psContext)) + return " "; + + switch (ePrec) + { + default: + case REFLECT_RESOURCE_PRECISION_UNKNOWN: + case REFLECT_RESOURCE_PRECISION_LOWP: + return EmitLowp(psContext) ? "lowp " : "mediump "; + case REFLECT_RESOURCE_PRECISION_HIGHP: + return "highp "; + case REFLECT_RESOURCE_PRECISION_MEDIUMP: + return "mediump "; + } +} + +static void TranslateVulkanResource(HLSLCrossCompilerContext* psContext, const Declaration* psDecl) +{ + bstring glsl = *psContext->currentGLSLString; + Shader* psShader = psContext->psShader; + + const ResourceBinding *psBinding = NULL; + psShader->sInfo.GetResourceFromBindingPoint(RGROUP_TEXTURE, psDecl->asOperands[0].ui32RegisterNumber, &psBinding); + ASSERT(psBinding != NULL); + + const char *samplerPrecision = GetSamplerPrecision(psContext, psBinding ? psBinding->ePrecision : REFLECT_RESOURCE_PRECISION_UNKNOWN); + std::string tname = ResourceName(psContext, RGROUP_TEXTURE, psDecl->asOperands[0].ui32RegisterNumber, 0); + + const char* samplerTypeName = GetVulkanTextureType(psContext, + psDecl->value.eResourceDimension, + psDecl->asOperands[0].ui32RegisterNumber); + + GLSLCrossDependencyData::VulkanResourceBinding binding = psContext->psDependencies->GetVulkanResourceBinding(tname); + bformata(glsl, "layout(set = %d, binding = %d) ", binding.first, binding.second); + bcatcstr(glsl, "uniform "); + bcatcstr(glsl, samplerPrecision); + bcatcstr(glsl, samplerTypeName); + bcatcstr(glsl, " "); + bcatcstr(glsl, tname.c_str()); + bcatcstr(glsl, ";\n"); +} + +static void TranslateResourceTexture(HLSLCrossCompilerContext* psContext, const Declaration* psDecl, uint32_t samplerCanDoShadowCmp) +{ + bstring glsl = *psContext->currentGLSLString; + Shader* psShader = psContext->psShader; + const char *samplerPrecision = NULL; + std::set::iterator i; + + const char* samplerTypeName = GetSamplerType(psContext, + psDecl->value.eResourceDimension, + psDecl->asOperands[0].ui32RegisterNumber); + + if (psDecl->value.eResourceDimension == RESOURCE_DIMENSION_TEXTURECUBEARRAY + && !HaveCubemapArray(psContext->psShader->eTargetLanguage)) + { + // Need to enable extension (either OES or ARB), but we only need to add it once + if (IsESLanguage(psContext->psShader->eTargetLanguage)) + { + psContext->EnableExtension("GL_OES_texture_cube_map_array"); + psContext->EnableExtension("GL_EXT_texture_cube_map_array"); + } + else + psContext->RequireExtension("GL_ARB_texture_cube_map_array"); + } + + if (psContext->psShader->eTargetLanguage == LANG_ES_100 && samplerCanDoShadowCmp && psDecl->ui32IsShadowTex) + { + psContext->EnableExtension("GL_EXT_shadow_samplers"); + } + + const ResourceBinding *psBinding = NULL; + psShader->sInfo.GetResourceFromBindingPoint(RGROUP_TEXTURE, psDecl->asOperands[0].ui32RegisterNumber, &psBinding); + ASSERT(psBinding != NULL); + + samplerPrecision = GetSamplerPrecision(psContext, psBinding ? psBinding->ePrecision : REFLECT_RESOURCE_PRECISION_UNKNOWN); + + if (psContext->flags & HLSLCC_FLAG_COMBINE_TEXTURE_SAMPLERS) + { + if (samplerCanDoShadowCmp && psDecl->ui32IsShadowTex) + { + for (i = psDecl->samplersUsed.begin(); i != psDecl->samplersUsed.end(); i++) + { + std::string tname = TextureSamplerName(&psShader->sInfo, psDecl->asOperands[0].ui32RegisterNumber, *i, 1); + bcatcstr(glsl, "uniform "); + bcatcstr(glsl, samplerPrecision); + bcatcstr(glsl, samplerTypeName); + bcatcstr(glsl, "Shadow "); + bcatcstr(glsl, tname.c_str()); + bcatcstr(glsl, ";\n"); + } + } + for (i = psDecl->samplersUsed.begin(); i != psDecl->samplersUsed.end(); i++) + { + std::string tname = TextureSamplerName(&psShader->sInfo, psDecl->asOperands[0].ui32RegisterNumber, *i, 0); + bcatcstr(glsl, "uniform "); + bcatcstr(glsl, samplerPrecision); + bcatcstr(glsl, samplerTypeName); + bcatcstr(glsl, " "); + bcatcstr(glsl, tname.c_str()); + bcatcstr(glsl, ";\n"); + } + } + + if (samplerCanDoShadowCmp && psDecl->ui32IsShadowTex) + { + //Create shadow and non-shadow sampler. + //HLSL does not have separate types for depth compare, just different functions. + std::string tname = ResourceName(psContext, RGROUP_TEXTURE, psDecl->asOperands[0].ui32RegisterNumber, 1); + + bcatcstr(glsl, "uniform "); + bcatcstr(glsl, samplerPrecision); + bcatcstr(glsl, samplerTypeName); + bcatcstr(glsl, "Shadow "); + bcatcstr(glsl, tname.c_str()); + bcatcstr(glsl, ";\n"); + } + + std::string tname = ResourceName(psContext, RGROUP_TEXTURE, psDecl->asOperands[0].ui32RegisterNumber, 0); + + bcatcstr(glsl, "uniform "); + bcatcstr(glsl, samplerPrecision); + bcatcstr(glsl, samplerTypeName); + bcatcstr(glsl, " "); + bcatcstr(glsl, tname.c_str()); + bcatcstr(glsl, ";\n"); +} + +void ToGLSL::HandleInputRedirect(const Declaration *psDecl, const char *Precision) +{ + Operand *psOperand = (Operand *)&psDecl->asOperands[0]; + Shader *psShader = psContext->psShader; + bstring glsl = *psContext->currentGLSLString; + int needsRedirect = 0; + const ShaderInfo::InOutSignature *psSig = NULL; + + int regSpace = psOperand->GetRegisterSpace(psContext); + if (regSpace == 0) + { + if (psShader->asPhases[psContext->currentPhase].acInputNeedsRedirect[psOperand->ui32RegisterNumber] == 0xff) + needsRedirect = 1; + } + else if (psShader->asPhases[psContext->currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] == 0xff) + { + psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->ui32CompMask, &psSig); + needsRedirect = 1; + } + + if (needsRedirect == 1) + { + // TODO What if this is indexed? + int needsLooping = 0; + int i = 0; + uint32_t origArraySize = 0; + uint32_t origMask = psOperand->ui32CompMask; + + ASSERT(psContext->psShader->aIndexedInput[regSpace][psOperand->ui32RegisterNumber] == 0); + + psContext->AddIndentation(); + // Does the input have multiple array components (such as geometry shader input, or domain shader control point input) + if ((psShader->eShaderType == DOMAIN_SHADER && regSpace == 0) || (psShader->eShaderType == GEOMETRY_SHADER)) + { + // The count is actually stored in psOperand->aui32ArraySizes[0] + origArraySize = psOperand->aui32ArraySizes[0]; + bformata(glsl, "%s vec4 phase%d_Input%d_%d[%d];\n", Precision, psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber, origArraySize); + needsLooping = 1; + i = origArraySize - 1; + } + else + bformata(glsl, "%s vec4 phase%d_Input%d_%d;\n", Precision, psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber); + + psContext->indent++; + + // Do a conditional loop. In normal cases needsLooping == 0 so this is only run once. + do + { + int comp = 0; + bstring str = GetEarlyMain(psContext); + if (needsLooping) + bformata(str, "phase%d_Input%d_%d[%d] = vec4(", psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber, i); + else + bformata(str, "phase%d_Input%d_%d = vec4(", psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber); + + while (comp < 4) + { + int numComps = 0; + int hasCast = 0; + int hasSig = 0; + if (regSpace == 0) + hasSig = psContext->psShader->sInfo.GetInputSignatureFromRegister(psOperand->ui32RegisterNumber, 1 << comp, &psSig, true); + else + hasSig = psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(psOperand->ui32RegisterNumber, 1 << comp, &psSig, true); + + if (hasSig) + { + numComps = GetNumberBitsSet(psSig->ui32Mask); + if (psSig->eComponentType == INOUT_COMPONENT_SINT32) + { + bformata(str, HaveBitEncodingOps(psContext->psShader->eTargetLanguage) ? "intBitsToFloat(" : "float("); + hasCast = 1; + } + else if (psSig->eComponentType == INOUT_COMPONENT_UINT32) + { + bformata(str, HaveBitEncodingOps(psContext->psShader->eTargetLanguage) ? "uintBitsToFloat(" : "float("); + hasCast = 1; + } + + // Override the array size of the operand so TranslateOperand call below prints the correct index + if (needsLooping) + psOperand->aui32ArraySizes[0] = i; + + // And the component mask + psOperand->ui32CompMask = 1 << comp; + + TranslateOperand(str, psOperand, TO_FLAG_NAME_ONLY); + + // Restore the original array size value and mask + psOperand->ui32CompMask = origMask; + if (needsLooping) + psOperand->aui32ArraySizes[0] = origArraySize; + + if (hasCast) + bcatcstr(str, ")"); + comp += numComps; + } + else // no signature found -> fill with zero + { + bcatcstr(str, "0"); + comp++; + } + + if (comp < 4) + bcatcstr(str, ", "); + } + bcatcstr(str, ");\n"); + } + while ((--i) >= 0); + + psContext->indent--; + + if (regSpace == 0) + psShader->asPhases[psContext->currentPhase].acInputNeedsRedirect[psOperand->ui32RegisterNumber] = 0xfe; + else + psShader->asPhases[psContext->currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] = 0xfe; + } +} + +void ToGLSL::TranslateDeclaration(const Declaration* psDecl) +{ + bstring glsl = *psContext->currentGLSLString; + Shader* psShader = psContext->psShader; + + switch (psDecl->eOpcode) + { + case OPCODE_DCL_INPUT_SGV: + case OPCODE_DCL_INPUT_PS_SGV: + { + const SPECIAL_NAME eSpecialName = psDecl->asOperands[0].eSpecialName; + switch (eSpecialName) + { + case NAME_POSITION: + { + AddBuiltinInput(psDecl, "gl_Position"); + break; + } + case NAME_RENDER_TARGET_ARRAY_INDEX: + { + AddBuiltinInput(psDecl, "gl_Layer"); + if (psShader->eShaderType == VERTEX_SHADER) + { + psContext->RequireExtension("GL_AMD_vertex_shader_layer"); + } + + break; + } + case NAME_CLIP_DISTANCE: + { + AddBuiltinInput(psDecl, "gl_ClipDistance"); + break; + } + case NAME_CULL_DISTANCE: + { + AddBuiltinInput(psDecl, "gl_CullDistance"); + break; + } + case NAME_VIEWPORT_ARRAY_INDEX: + { + AddBuiltinInput(psDecl, "gl_ViewportIndex"); + break; + } + case NAME_INSTANCE_ID: + { + AddBuiltinInput(psDecl, "gl_InstanceID"); + break; + } + case NAME_IS_FRONT_FACE: + { + /* + Cast to int used because + if(gl_FrontFacing != 0) failed to compiled on Intel HD 4000. + Suggests no implicit conversion for bool<->int. + */ + + if (HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) + AddBuiltinInput(psDecl, "(gl_FrontFacing ? 0xffffffffu : uint(0))"); // Old ES3.0 Adrenos treat 0u as const int + else + AddBuiltinInput(psDecl, "(gl_FrontFacing ? 1 : 0)"); + break; + } + case NAME_SAMPLE_INDEX: + { + // Using gl_SampleID requires either GL_OES_sample_variables or #version 320 es + if (IsESLanguage(psContext->psShader->eTargetLanguage)) + psContext->RequireExtension("GL_OES_sample_variables"); + AddBuiltinInput(psDecl, "gl_SampleID"); + break; + } + case NAME_VERTEX_ID: + { + AddBuiltinInput(psDecl, "gl_VertexID"); + break; + } + case NAME_PRIMITIVE_ID: + { + if (psShader->eShaderType == GEOMETRY_SHADER) + AddBuiltinInput(psDecl, "gl_PrimitiveIDIn"); // LOL opengl. + else + AddBuiltinInput(psDecl, "gl_PrimitiveID"); + break; + } + default: + { + bformata(glsl, "in vec4 %s;\n", psDecl->asOperands[0].specialName.c_str()); + } + } + break; + } + + case OPCODE_DCL_OUTPUT_SIV: + { + switch (psDecl->asOperands[0].eSpecialName) + { + case NAME_POSITION: + { + AddBuiltinOutput(psDecl, 0, "gl_Position"); + break; + } + case NAME_RENDER_TARGET_ARRAY_INDEX: + { + AddBuiltinOutput(psDecl, 0, "gl_Layer"); + if (psShader->eShaderType == VERTEX_SHADER) + { + if (psContext->IsVulkan()) + psContext->RequireExtension("GL_ARB_shader_viewport_layer_array"); + else + psContext->RequireExtension("GL_AMD_vertex_shader_layer"); + } + + break; + } + case NAME_CLIP_DISTANCE: + { + AddBuiltinOutput(psDecl, 0, "gl_ClipDistance"); + break; + } + case NAME_CULL_DISTANCE: + { + AddBuiltinOutput(psDecl, 0, "gl_CullDistance"); + break; + } + case NAME_VIEWPORT_ARRAY_INDEX: + { + AddBuiltinOutput(psDecl, 0, "gl_ViewportIndex"); + break; + } + case NAME_VERTEX_ID: + { + ASSERT(0); //VertexID is not an output + break; + } + case NAME_PRIMITIVE_ID: + { + AddBuiltinOutput(psDecl, 0, "gl_PrimitiveID"); + break; + } + case NAME_INSTANCE_ID: + { + ASSERT(0); //InstanceID is not an output + break; + } + case NAME_IS_FRONT_FACE: + { + ASSERT(0); //FrontFacing is not an output + break; + } + case NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR: + { + if (psContext->psShader->aIndexedOutput[1][psDecl->asOperands[0].ui32RegisterNumber]) + { + AddBuiltinOutput(psDecl, 4, "gl_TessLevelOuter"); + } + else + { + AddBuiltinOutput(psDecl, 0, "gl_TessLevelOuter[0]"); + } + break; + } + case NAME_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR: + { + AddBuiltinOutput(psDecl, 0, "gl_TessLevelOuter[1]"); + break; + } + case NAME_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR: + { + AddBuiltinOutput(psDecl, 0, "gl_TessLevelOuter[2]"); + break; + } + case NAME_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR: + { + AddBuiltinOutput(psDecl, 0, "gl_TessLevelOuter[3]"); + break; + } + case NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR: + { + if (psContext->psShader->aIndexedOutput[1][psDecl->asOperands[0].ui32RegisterNumber]) + { + AddBuiltinOutput(psDecl, 3, "gl_TessLevelOuter"); + } + else + { + AddBuiltinOutput(psDecl, 0, "gl_TessLevelOuter[0]"); + } + break; + } + case NAME_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR: + { + AddBuiltinOutput(psDecl, 0, "gl_TessLevelOuter[1]"); + break; + } + case NAME_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR: + { + AddBuiltinOutput(psDecl, 0, "gl_TessLevelOuter[2]"); + break; + } + case NAME_FINAL_LINE_DENSITY_TESSFACTOR: + { + if (psContext->psShader->aIndexedOutput[1][psDecl->asOperands[0].ui32RegisterNumber]) + { + AddBuiltinOutput(psDecl, 2, "gl_TessLevelOuter"); + } + else + { + AddBuiltinOutput(psDecl, 0, "gl_TessLevelOuter[0]"); + } + break; + } + case NAME_FINAL_LINE_DETAIL_TESSFACTOR: + { + AddBuiltinOutput(psDecl, 0, "gl_TessLevelOuter[1]"); + break; + } + case NAME_FINAL_TRI_INSIDE_TESSFACTOR: + case NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR: + { + if (psContext->psShader->aIndexedOutput[1][psDecl->asOperands[0].ui32RegisterNumber]) + { + AddBuiltinOutput(psDecl, 2, "gl_TessLevelInner"); + } + else + { + AddBuiltinOutput(psDecl, 0, "gl_TessLevelInner[0]"); + } + break; + } + case NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR: + { + AddBuiltinOutput(psDecl, 0, "gl_TessLevelInner[1]"); + break; + } + default: + { + // Sometimes DX compiler seems to declare patch constant outputs like this. Anyway, nothing for us to do. +// bformata(glsl, "out vec4 %s;\n", psDecl->asOperands[0].specialName.c_str()); + +/* bcatcstr(glsl, "#define "); + TranslateOperand(psContext, &psDecl->asOperands[0], TO_FLAG_NONE); + bformata(glsl, " %s\n", psDecl->asOperands[0].pszSpecialName); + break;*/ + } + } + break; + } + case OPCODE_DCL_INPUT: + { + const Operand* psOperand = &psDecl->asOperands[0]; + + int iNumComponents = psOperand->GetNumInputElements(psContext); + const char* StorageQualifier = "attribute"; + std::string inputName; + const char* Precision = ""; + + if ((psOperand->eType == OPERAND_TYPE_INPUT_DOMAIN_POINT) || + (psOperand->eType == OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID) || + (psOperand->eType == OPERAND_TYPE_INPUT_COVERAGE_MASK) || + (psOperand->eType == OPERAND_TYPE_INPUT_THREAD_ID) || + (psOperand->eType == OPERAND_TYPE_INPUT_THREAD_GROUP_ID) || + (psOperand->eType == OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP) || + (psOperand->eType == OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED) || + (psOperand->eType == OPERAND_TYPE_INPUT_FORK_INSTANCE_ID)) + { + break; + } + + // No need to declare patch constants read again by the hull shader. + if ((psOperand->eType == OPERAND_TYPE_INPUT_PATCH_CONSTANT) && psContext->psShader->eShaderType == HULL_SHADER) + { + break; + } + // ...or control points + if ((psOperand->eType == OPERAND_TYPE_INPUT_CONTROL_POINT) && psContext->psShader->eShaderType == HULL_SHADER) + { + break; + } + + // Also skip position input to domain shader + if ((psOperand->eType == OPERAND_TYPE_INPUT_CONTROL_POINT) && psContext->psShader->eShaderType == DOMAIN_SHADER) + { + const ShaderInfo::InOutSignature *psIn = NULL; + psContext->psShader->sInfo.GetInputSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->GetAccessMask(), &psIn); + ASSERT(psIn != NULL); + + if ((psIn->semanticName == "SV_POSITION" || psIn->semanticName == "POS") && psIn->ui32SemanticIndex == 0) + break; + } + + //Already declared as part of an array. + if (psShader->aIndexedInput[psOperand->GetRegisterSpace(psContext)][psDecl->asOperands[0].ui32RegisterNumber] == -1) + { + break; + } + + inputName = psContext->GetDeclaredInputName(psOperand, NULL, 1, NULL); + + if (InOutSupported(psContext->psShader->eTargetLanguage)) + { + if (psOperand->eType == OPERAND_TYPE_INPUT_PATCH_CONSTANT && psContext->psShader->eShaderType == DOMAIN_SHADER) + StorageQualifier = "patch in"; + else + StorageQualifier = "in"; + } + + if (HavePrecisionQualifiers(psContext)) + { + switch (psOperand->eMinPrecision) + { + case OPERAND_MIN_PRECISION_DEFAULT: + { + Precision = "highp"; + break; + } + case OPERAND_MIN_PRECISION_FLOAT_16: + { + Precision = "mediump"; + break; + } + case OPERAND_MIN_PRECISION_FLOAT_2_8: + { + Precision = EmitLowp(psContext) ? "lowp " : "mediump "; + break; + } + case OPERAND_MIN_PRECISION_SINT_16: + { + Precision = "mediump"; + break; + } + case OPERAND_MIN_PRECISION_UINT_16: + { + Precision = "mediump"; + break; + } + } + } + + const char * Interpolation = ""; + + if (psShader->eShaderType == GEOMETRY_SHADER || psShader->eShaderType == HULL_SHADER || psShader->eShaderType == DOMAIN_SHADER) + { + const ShaderInfo::InOutSignature* psSignature = NULL; + + psShader->sInfo.GetInputSignatureFromRegister(psDecl->asOperands[0].ui32RegisterNumber, + psDecl->asOperands[0].ui32CompMask, + &psSignature, true); + + if ((psSignature != NULL) && (psSignature->eComponentType == INOUT_COMPONENT_UINT32 || + psSignature->eComponentType == INOUT_COMPONENT_SINT32)) // GLSL spec requires that integer inputs always have "flat" interpolation + { + Interpolation = GetInterpolationString(INTERPOLATION_CONSTANT, psContext->psShader->eTargetLanguage); + } + else if (psContext->psDependencies) // For floats we get the interpolation that was resolved from the fragment shader input + { + Interpolation = GetInterpolationString(psContext->psDependencies->GetInterpolationMode(psDecl->asOperands[0].ui32RegisterNumber), psContext->psShader->eTargetLanguage); + } + } + + DeclareInput(psContext, psDecl, + Interpolation, StorageQualifier, Precision, iNumComponents, (OPERAND_INDEX_DIMENSION)psOperand->iIndexDims, inputName.c_str(), psOperand->ui32CompMask); + + HandleInputRedirect(psDecl, Precision); + break; + } + case OPCODE_DCL_INPUT_PS_SIV: + { + switch (psDecl->asOperands[0].eSpecialName) + { + case NAME_POSITION: + { + AddBuiltinInput(psDecl, "gl_FragCoord"); + bcatcstr(GetEarlyMain(psContext), "vec4 hlslcc_FragCoord = vec4(gl_FragCoord.xyz, 1.0/gl_FragCoord.w);\n"); + break; + } + case NAME_RENDER_TARGET_ARRAY_INDEX: + { + AddBuiltinInput(psDecl, "gl_Layer"); + break; + } + default: + ASSERT(0); + break; + } + break; + } + case OPCODE_DCL_INPUT_SIV: + { + if (psShader->eShaderType == PIXEL_SHADER && psContext->psDependencies) + { + psContext->psDependencies->SetInterpolationMode(psDecl->asOperands[0].ui32RegisterNumber, psDecl->value.eInterpolation); + } + break; + } + case OPCODE_DCL_INPUT_PS: + { + const Operand* psOperand = &psDecl->asOperands[0]; + int iNumComponents = psOperand->GetNumInputElements(psContext); + const char* StorageQualifier = "varying"; + const char* Precision = ""; + std::string inputName; + const char* Interpolation = ""; + int hasNoPerspective = psContext->psShader->eTargetLanguage <= LANG_ES_310 ? 0 : 1; + inputName = psContext->GetDeclaredInputName(psOperand, NULL, 1, NULL); + + if (InOutSupported(psContext->psShader->eTargetLanguage)) + { + StorageQualifier = "in"; + } + const ShaderInfo::InOutSignature* psSignature = NULL; + + psShader->sInfo.GetInputSignatureFromRegister(psDecl->asOperands[0].ui32RegisterNumber, + psDecl->asOperands[0].ui32CompMask, + &psSignature); + + if (psSignature->eComponentType == INOUT_COMPONENT_UINT32 || + psSignature->eComponentType == INOUT_COMPONENT_SINT32) // GLSL spec requires that integer inputs always have "flat" interpolation + { + Interpolation = GetInterpolationString(INTERPOLATION_CONSTANT, psContext->psShader->eTargetLanguage); + } + else + { + switch (psDecl->value.eInterpolation) + { + case INTERPOLATION_CONSTANT: + { + Interpolation = "flat "; + break; + } + case INTERPOLATION_LINEAR: + { + break; + } + case INTERPOLATION_LINEAR_CENTROID: + { + Interpolation = "centroid "; + break; + } + case INTERPOLATION_LINEAR_NOPERSPECTIVE: + { + Interpolation = hasNoPerspective ? "noperspective " : ""; + break; + } + case INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID: + { + Interpolation = hasNoPerspective ? "noperspective centroid " : "centroid"; + break; + } + case INTERPOLATION_LINEAR_SAMPLE: + { + Interpolation = hasNoPerspective ? "sample " : ""; + break; + } + case INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE: + { + Interpolation = hasNoPerspective ? "noperspective sample " : ""; + break; + } + default: + ASSERT(0); + break; + } + } + + if (HavePrecisionQualifiers(psContext)) + { + switch (psOperand->eMinPrecision) + { + case OPERAND_MIN_PRECISION_DEFAULT: + { + Precision = "highp"; + break; + } + case OPERAND_MIN_PRECISION_FLOAT_16: + { + Precision = "mediump"; + break; + } + case OPERAND_MIN_PRECISION_FLOAT_2_8: + { + Precision = EmitLowp(psContext) ? "lowp " : "mediump "; + break; + } + case OPERAND_MIN_PRECISION_SINT_16: + { + Precision = "mediump"; + break; + } + case OPERAND_MIN_PRECISION_UINT_16: + { + Precision = "mediump"; + break; + } + } + } + + bool haveFramebufferFetch = (psShader->extensions->EXT_shader_framebuffer_fetch && + psShader->eShaderType == PIXEL_SHADER && + psContext->flags & HLSLCC_FLAG_SHADER_FRAMEBUFFER_FETCH); + + // If this is a SV_Target input and framebuffer fetch is enabled, do special input declaration unless output is declared later + if (haveFramebufferFetch && psOperand->iPSInOut && inputName.size() == 13 && !strncmp(inputName.c_str(), "vs_SV_Target", 12)) + { + bstring type = NULL; + + switch (psSignature->eComponentType) + { + case INOUT_COMPONENT_UINT32: + { + if (iNumComponents > 1) + type = bformat("uvec%d", iNumComponents); + else + type = bformat("uint"); + break; + } + case INOUT_COMPONENT_SINT32: + { + if (iNumComponents > 1) + type = bformat("ivec%d", iNumComponents); + else + type = bformat("int"); + break; + } + case INOUT_COMPONENT_FLOAT32: + { + if (iNumComponents > 1) + type = bformat("vec%d", iNumComponents); + else + type = bformat("float"); + break; + } + default: + ASSERT(0); + break; + } + + uint32_t renderTarget = psSignature->ui32SemanticIndex; + + char OutputName[512]; + bstring oname; + oname = bformat("%s%s%d", psContext->outputPrefix, psSignature->semanticName.c_str(), renderTarget); + strncpy(OutputName, (char *)oname->data, 512); + bdestroy(oname); + + if (WriteToFragData(psContext->psShader->eTargetLanguage)) + { + if (haveFramebufferFetch) + { + bcatcstr(glsl, "#ifdef GL_EXT_shader_framebuffer_fetch\n"); + bformata(glsl, "#define vs_%s gl_LastFragData[%d]\n", OutputName, renderTarget); + bcatcstr(glsl, "#else\n"); + bformata(glsl, "#define vs_%s gl_FragData[%d]\n", OutputName, renderTarget); + bcatcstr(glsl, "#endif\n"); + } + else + bformata(glsl, "#define vs_%s gl_FragData[%d]\n", OutputName, renderTarget); + } + else + { + if (!RenderTargetDeclared(renderTarget)) + { + bstring layoutQualifier = bformat(""); + + if (HaveInOutLocationQualifier(psContext->psShader->eTargetLanguage) || + HaveLimitedInOutLocationQualifier(psContext->psShader->eTargetLanguage, psContext->psShader->extensions)) + { + uint32_t index = 0; + + if ((psContext->flags & HLSLCC_FLAG_DUAL_SOURCE_BLENDING) && DualSourceBlendSupported(psContext->psShader->eTargetLanguage)) + { + if (renderTarget > 0) + { + renderTarget = 0; + index = 1; + } + bdestroy(layoutQualifier); + layoutQualifier = bformat("layout(location = %d, index = %d) ", renderTarget, index); + } + else + { + bdestroy(layoutQualifier); + layoutQualifier = bformat("layout(location = %d) ", renderTarget); + } + } + + auto lq = bstr2cstr(layoutQualifier, '\0'); + + if (haveFramebufferFetch) + { + bcatcstr(glsl, "#ifdef GL_EXT_shader_framebuffer_fetch\n"); + bformata(glsl, "%sinout %s %s %s;\n", lq, Precision, type->data, OutputName); + bcatcstr(glsl, "#else\n"); + bformata(glsl, "%sout %s %s %s;\n", lq, Precision, type->data, OutputName); + bcatcstr(glsl, "#endif\n"); + } + else + bformata(glsl, "%sout %s %s %s;\n", lq, Precision, type->data, OutputName); + + bcstrfree(lq); + bdestroy(layoutQualifier); + } + } + break; + } + + DeclareInput(psContext, psDecl, + Interpolation, StorageQualifier, Precision, iNumComponents, INDEX_1D, inputName.c_str(), psOperand->ui32CompMask); + + HandleInputRedirect(psDecl, Precision); + + break; + } + case OPCODE_DCL_TEMPS: + { + uint32_t i = 0; + const uint32_t ui32NumTemps = psDecl->value.ui32NumTemps; + bool usePrecision = (HavePrecisionQualifiers(psContext) != 0); + + for (i = 0; i < ui32NumTemps; i++) + { + if (psShader->psFloatTempSizes[i] != 0) + bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_FLOAT, psShader->psFloatTempSizes[i], usePrecision), i); + if (psShader->psFloat16TempSizes[i] != 0) + bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "16_%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_FLOAT16, psShader->psFloat16TempSizes[i], usePrecision), i); + if (psShader->psFloat10TempSizes[i] != 0) + bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "10_%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_FLOAT10, psShader->psFloat10TempSizes[i], usePrecision), i); + if (psShader->psIntTempSizes[i] != 0) + bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "i%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_INT, psShader->psIntTempSizes[i], usePrecision), i); + if (psShader->psInt16TempSizes[i] != 0) + bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "i16_%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_INT16, psShader->psInt16TempSizes[i], usePrecision), i); + if (psShader->psInt12TempSizes[i] != 0) + bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "i12_%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_INT12, psShader->psInt12TempSizes[i], usePrecision), i); + if (psShader->psUIntTempSizes[i] != 0) + bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "u%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_UINT, psShader->psUIntTempSizes[i], usePrecision), i); + if (psShader->psUInt16TempSizes[i] != 0) + bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "u16_%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_UINT16, psShader->psUInt16TempSizes[i], usePrecision), i); + if (psShader->fp64 && (psShader->psDoubleTempSizes[i] != 0)) + bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "d%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_DOUBLE, psShader->psDoubleTempSizes[i], usePrecision), i); + if (psShader->psBoolTempSizes[i] != 0) + bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "b%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_BOOL, psShader->psBoolTempSizes[i], usePrecision), i); + } + break; + } + case OPCODE_SPECIAL_DCL_IMMCONST: + { + ASSERT(0 && "DX9 shaders no longer supported!"); + break; + } + case OPCODE_DCL_CONSTANT_BUFFER: + { + const Operand* psOperand = &psDecl->asOperands[0]; + const uint32_t ui32BindingPoint = psOperand->aui32ArraySizes[0]; + + const ConstantBuffer* psCBuf = NULL; + psContext->psShader->sInfo.GetConstantBufferFromBindingPoint(RGROUP_CBUFFER, ui32BindingPoint, &psCBuf); + + // We don't have a original resource name, maybe generate one??? + if (!psCBuf) + { + if (HaveUniformBindingsAndLocations(psContext->psShader->eTargetLanguage, psContext->psShader->extensions, psContext->flags)) + bformata(glsl, "layout(location = %d) ", ui32BindingPoint); + + bformata(glsl, "layout(std140) uniform ConstantBuffer%d {\n\tvec4 data[%d];\n} cb%d;\n", ui32BindingPoint, psOperand->aui32ArraySizes[1], ui32BindingPoint); + break; + } + + if (psCBuf->name.substr(0, 20) == "hlslcc_SubpassInput_" && psCBuf->name.length() >= 23 && !psCBuf->asVars.empty()) + { + // Special case for vulkan subpass input. + + // The multisample versions have multiple members in the cbuffer, but we must only declare once. + // We still need to loop through all the variables and adjust names + + // Pick up the type and index + char ty = psCBuf->name[20]; + int idx = psCBuf->name[22] - '0'; + bool isMS = false; + std::pair binding = psContext->psDependencies->GetVulkanResourceBinding((std::string &)psCBuf->name, false, 2); + + bool declared = false; + for (std::vector::const_iterator itr = psCBuf->asVars.begin(); itr != psCBuf->asVars.end(); itr++) + { + ShaderVar &sv = (ShaderVar &)*itr; + if (sv.name.substr(0, 15) == "hlslcc_fbinput_") + { + if (!declared) + { + switch (ty) + { + case 'f': + bformata(glsl, "layout(input_attachment_index = %d, set = %d, binding = %d) uniform highp subpassInput %s;\n", idx, binding.first, binding.second, sv.name.c_str()); + break; + case 'h': + bformata(glsl, "layout(input_attachment_index = %d, set = %d, binding = %d) uniform mediump subpassInput %s;\n", idx, binding.first, binding.second, sv.name.c_str()); + break; + case 'i': + bformata(glsl, "layout(input_attachment_index = %d, set = %d, binding = %d) uniform isubpassInput %s;\n", idx, binding.first, binding.second, sv.name.c_str()); + break; + case 'u': + bformata(glsl, "layout(input_attachment_index = %d, set = %d, binding = %d) uniform usubpassInput %s;\n", idx, binding.first, binding.second, sv.name.c_str()); + break; + case 'F': + bformata(glsl, "layout(input_attachment_index = %d, set = %d, binding = %d) uniform highp subpassInputMS %s;\n", idx, binding.first, binding.second, sv.name.substr(0, 16).c_str()); + isMS = true; + break; + case 'H': + bformata(glsl, "layout(input_attachment_index = %d, set = %d, binding = %d) uniform mediump subpassInputMS %s;\n", idx, binding.first, binding.second, sv.name.substr(0, 16).c_str()); + isMS = true; + break; + case 'I': + bformata(glsl, "layout(input_attachment_index = %d, set = %d, binding = %d) uniform isubpassInputMS %s;\n", idx, binding.first, binding.second, sv.name.substr(0, 16).c_str()); + isMS = true; + break; + case 'U': + bformata(glsl, "layout(input_attachment_index = %d, set = %d, binding = %d) uniform usubpassInputMS %s;\n", idx, binding.first, binding.second, sv.name.substr(0, 16).c_str()); + isMS = true; + break; + default: + break; + } + declared = true; + } + else + { + if (ty == 'F' || ty == 'I' || ty == 'U') + isMS = true; + } + // Munge the name so it'll get the correct function call in GLSL directly + sv.name.insert(0, "subpassLoad("); + if (isMS) + sv.name.append(","); + else + sv.name.append(")"); + // Also update the type name + sv.sType.name = sv.name; + sv.sType.fullName = sv.name; + } + } + + // Break out so this doesn't get declared. + break; + } + + if (psCBuf->name == "OVR_multiview") + { + // Special case for piggy-backing multiview info out + // This is not really a cbuffer, but if we see this being accessed, we know we need viewID + + // Extract numViews + uint32_t numViews = 0; + for (std::vector::const_iterator itr = psCBuf->asVars.begin(); itr != psCBuf->asVars.end(); itr++) + { + if (strncmp(itr->name.c_str(), "numViews_", 9) == 0) + { + // I really don't think we'll ever have more than 9 multiviews + numViews = itr->name[9] - '0'; + break; + } + } + if (numViews > 0 && numViews < 10) + { + // multiview2 is required because we have built-in shaders that do eye-dependent work other than just position + psContext->RequireExtension("GL_OVR_multiview2"); + + if (psShader->eShaderType == VERTEX_SHADER) + bformata(glsl, "layout(num_views = %d) in;\n", numViews); + + break; // Break out so we don't actually declare this cbuffer + } + } + + if (psContext->flags & HLSLCC_FLAG_UNIFORM_BUFFER_OBJECT) + { + if (psContext->flags & HLSLCC_FLAG_GLOBAL_CONSTS_NEVER_IN_UBO && psCBuf->name[0] == '$') + { + DeclareStructConstants(ui32BindingPoint, psCBuf, psOperand, glsl); + } + else + { + DeclareUBOConstants(ui32BindingPoint, psCBuf, glsl); + } + } + else + { + DeclareStructConstants(ui32BindingPoint, psCBuf, psOperand, glsl); + } + break; + } + case OPCODE_DCL_RESOURCE: + { + psShader->aeResourceDims[psDecl->asOperands[0].ui32RegisterNumber] = psDecl->value.eResourceDimension; + + // Vulkan doesn't use combined textures+samplers, so do own handling in a separate func + if (psContext->IsVulkan()) + { + TranslateVulkanResource(psContext, psDecl); + break; + } + + if (HaveUniformBindingsAndLocations(psContext->psShader->eTargetLanguage, psContext->psShader->extensions, psContext->flags)) + { + // Explicit layout bindings are not currently compatible with combined texture samplers. The layout below assumes there is exactly one GLSL sampler + // for each HLSL texture declaration, but when combining textures+samplers, there can be multiple OGL samplers for each HLSL texture declaration. + if ((psContext->flags & HLSLCC_FLAG_COMBINE_TEXTURE_SAMPLERS) != HLSLCC_FLAG_COMBINE_TEXTURE_SAMPLERS) + { + //Constant buffer locations start at 0. Resource locations start at ui32NumConstantBuffers. + bformata(glsl, "layout(location = %d) ", + psContext->psShader->sInfo.psConstantBuffers.size() + psDecl->asOperands[0].ui32RegisterNumber); + } + } + + switch (psDecl->value.eResourceDimension) + { + case RESOURCE_DIMENSION_BUFFER: + { + bcatcstr(glsl, "uniform "); + if (IsESLanguage(psContext->psShader->eTargetLanguage)) + bcatcstr(glsl, "highp "); + bformata(glsl, "%s ", GetSamplerType(psContext, + RESOURCE_DIMENSION_BUFFER, + psDecl->asOperands[0].ui32RegisterNumber)); + TranslateOperand(&psDecl->asOperands[0], TO_FLAG_NONE); + bcatcstr(glsl, ";\n"); + break; + } + case RESOURCE_DIMENSION_TEXTURE1D: + { + TranslateResourceTexture(psContext, psDecl, 1); + break; + } + case RESOURCE_DIMENSION_TEXTURE2D: + { + TranslateResourceTexture(psContext, psDecl, 1); + break; + } + case RESOURCE_DIMENSION_TEXTURE2DMS: + { + TranslateResourceTexture(psContext, psDecl, 0); + break; + } + case RESOURCE_DIMENSION_TEXTURE3D: + { + TranslateResourceTexture(psContext, psDecl, 0); + break; + } + case RESOURCE_DIMENSION_TEXTURECUBE: + { + TranslateResourceTexture(psContext, psDecl, 1); + break; + } + case RESOURCE_DIMENSION_TEXTURE1DARRAY: + { + TranslateResourceTexture(psContext, psDecl, 1); + break; + } + case RESOURCE_DIMENSION_TEXTURE2DARRAY: + { + TranslateResourceTexture(psContext, psDecl, 1); + break; + } + case RESOURCE_DIMENSION_TEXTURE2DMSARRAY: + { + TranslateResourceTexture(psContext, psDecl, 0); + break; + } + case RESOURCE_DIMENSION_TEXTURECUBEARRAY: + { + TranslateResourceTexture(psContext, psDecl, 1); + break; + } + default: + ASSERT(0); + break; + } + break; + } + case OPCODE_DCL_OUTPUT: + { + bool needsDeclare = true; + if (psShader->eShaderType == HULL_SHADER && psShader->asPhases[psContext->currentPhase].ePhase == HS_CTRL_POINT_PHASE && psDecl->asOperands[0].ui32RegisterNumber == 0) + { + // Need extra check from signature: + const ShaderInfo::InOutSignature *sig = NULL; + psShader->sInfo.GetOutputSignatureFromRegister(0, psDecl->asOperands->GetAccessMask(), 0, &sig, true); + if (!sig || sig->semanticName == "POSITION" || sig->semanticName == "POS") + { + needsDeclare = false; + AddBuiltinOutput(psDecl, 0, "gl_out[gl_InvocationID].gl_Position"); + } + } + + if (needsDeclare) + { + AddUserOutput(psDecl); + } + break; + } + case OPCODE_DCL_GLOBAL_FLAGS: + { + uint32_t ui32Flags = psDecl->value.ui32GlobalFlags; -/* bcatcstr(glsl, "#define "); - TranslateOperand(psContext, &psDecl->asOperands[0], TO_FLAG_NONE); - bformata(glsl, " %s\n", psDecl->asOperands[0].pszSpecialName); - break;*/ - } - } - break; - } - case OPCODE_DCL_INPUT: - { - const Operand* psOperand = &psDecl->asOperands[0]; - - int iNumComponents = psOperand->GetNumInputElements(psContext); - const char* StorageQualifier = "attribute"; - std::string inputName; - const char* Precision = ""; - - if((psOperand->eType == OPERAND_TYPE_INPUT_DOMAIN_POINT)|| - (psOperand->eType == OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID)|| - (psOperand->eType == OPERAND_TYPE_INPUT_COVERAGE_MASK)|| - (psOperand->eType == OPERAND_TYPE_INPUT_THREAD_ID)|| - (psOperand->eType == OPERAND_TYPE_INPUT_THREAD_GROUP_ID)|| - (psOperand->eType == OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP)|| - (psOperand->eType == OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED) || - (psOperand->eType == OPERAND_TYPE_INPUT_FORK_INSTANCE_ID)) - { - break; - } - - // No need to declare patch constants read again by the hull shader. - if ((psOperand->eType == OPERAND_TYPE_INPUT_PATCH_CONSTANT) && psContext->psShader->eShaderType == HULL_SHADER) - { - break; - } - // ...or control points - if ((psOperand->eType == OPERAND_TYPE_INPUT_CONTROL_POINT) && psContext->psShader->eShaderType == HULL_SHADER) - { - break; - } - - // Also skip position input to domain shader - if ((psOperand->eType == OPERAND_TYPE_INPUT_CONTROL_POINT) && psContext->psShader->eShaderType == DOMAIN_SHADER) - { - const ShaderInfo::InOutSignature *psIn = NULL; - psContext->psShader->sInfo.GetInputSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->GetAccessMask(), &psIn); - ASSERT(psIn != NULL); - - if ((psIn->semanticName == "SV_POSITION" || psIn->semanticName == "POS") && psIn->ui32SemanticIndex == 0) - break; - } - - //Already declared as part of an array. - if(psShader->aIndexedInput[psOperand->GetRegisterSpace(psContext)][psDecl->asOperands[0].ui32RegisterNumber] == -1) - { - break; - } - - inputName = psContext->GetDeclaredInputName(psOperand, NULL, 1, NULL); - - if(InOutSupported(psContext->psShader->eTargetLanguage)) - { - if (psOperand->eType == OPERAND_TYPE_INPUT_PATCH_CONSTANT && psContext->psShader->eShaderType == DOMAIN_SHADER) - StorageQualifier = "patch in"; - else - StorageQualifier = "in"; - } - - if(HavePrecisionQualifiers(psContext)) - { - switch(psOperand->eMinPrecision) - { - case OPERAND_MIN_PRECISION_DEFAULT: - { - Precision = "highp"; - break; - } - case OPERAND_MIN_PRECISION_FLOAT_16: - { - Precision = "mediump"; - break; - } - case OPERAND_MIN_PRECISION_FLOAT_2_8: - { - Precision = EmitLowp(psContext) ? "lowp " : "mediump "; - break; - } - case OPERAND_MIN_PRECISION_SINT_16: - { - Precision = "mediump"; - break; - } - case OPERAND_MIN_PRECISION_UINT_16: - { - Precision = "mediump"; - break; - } - } - } - - const char * Interpolation = ""; - - if (psShader->eShaderType == GEOMETRY_SHADER || psShader->eShaderType == HULL_SHADER || psShader->eShaderType == DOMAIN_SHADER) - { - const ShaderInfo::InOutSignature* psSignature = NULL; - - psShader->sInfo.GetInputSignatureFromRegister(psDecl->asOperands[0].ui32RegisterNumber, - psDecl->asOperands[0].ui32CompMask, - &psSignature, true); - - if ((psSignature != NULL) && (psSignature->eComponentType == INOUT_COMPONENT_UINT32 || - psSignature->eComponentType == INOUT_COMPONENT_SINT32)) // GLSL spec requires that integer inputs always have "flat" interpolation - { - Interpolation = GetInterpolationString(INTERPOLATION_CONSTANT, psContext->psShader->eTargetLanguage); - } - else if (psContext->psDependencies) // For floats we get the interpolation that was resolved from the fragment shader input - { - Interpolation = GetInterpolationString(psContext->psDependencies->GetInterpolationMode(psDecl->asOperands[0].ui32RegisterNumber), psContext->psShader->eTargetLanguage); - } - } - - DeclareInput(psContext, psDecl, - Interpolation, StorageQualifier, Precision, iNumComponents, (OPERAND_INDEX_DIMENSION)psOperand->iIndexDims, inputName.c_str(), psOperand->ui32CompMask); - - HandleInputRedirect(psDecl, Precision); - break; - } - case OPCODE_DCL_INPUT_PS_SIV: - { - switch(psDecl->asOperands[0].eSpecialName) - { - case NAME_POSITION: - { - AddBuiltinInput(psDecl, "gl_FragCoord"); - break; - } + if (ui32Flags & GLOBAL_FLAG_FORCE_EARLY_DEPTH_STENCIL && psContext->psShader->eShaderType == PIXEL_SHADER) + { + bcatcstr(glsl, "layout(early_fragment_tests) in;\n"); + psShader->sInfo.bEarlyFragmentTests = true; + } + if (!(ui32Flags & GLOBAL_FLAG_REFACTORING_ALLOWED)) + { + //TODO add precise + //HLSL precise - http://msdn.microsoft.com/en-us/library/windows/desktop/hh447204(v=vs.85).aspx + } + if (ui32Flags & GLOBAL_FLAG_ENABLE_DOUBLE_PRECISION_FLOAT_OPS) + { + psContext->EnableExtension("GL_ARB_gpu_shader_fp64"); + psShader->fp64 = 1; + } + break; + } + + case OPCODE_DCL_THREAD_GROUP: + { + bformata(glsl, "layout(local_size_x = %d, local_size_y = %d, local_size_z = %d) in;\n", + psDecl->value.aui32WorkGroupSize[0], + psDecl->value.aui32WorkGroupSize[1], + psDecl->value.aui32WorkGroupSize[2]); + break; + } + case OPCODE_DCL_TESS_OUTPUT_PRIMITIVE: + { + if (psContext->psShader->eShaderType == HULL_SHADER) + { + psContext->psShader->sInfo.eTessOutPrim = psDecl->value.eTessOutPrim; + // Invert triangle winding order to match glsl better, except on vulkan + if ((psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) == 0) + { + if (psContext->psShader->sInfo.eTessOutPrim == TESSELLATOR_OUTPUT_TRIANGLE_CW) + psContext->psShader->sInfo.eTessOutPrim = TESSELLATOR_OUTPUT_TRIANGLE_CCW; + else if (psContext->psShader->sInfo.eTessOutPrim == TESSELLATOR_OUTPUT_TRIANGLE_CCW) + psContext->psShader->sInfo.eTessOutPrim = TESSELLATOR_OUTPUT_TRIANGLE_CW; + } + } + break; + } + case OPCODE_DCL_TESS_DOMAIN: + { + if (psContext->psShader->eShaderType == DOMAIN_SHADER) + { + switch (psDecl->value.eTessDomain) + { + case TESSELLATOR_DOMAIN_ISOLINE: + { + bcatcstr(glsl, "layout(isolines) in;\n"); + break; + } + case TESSELLATOR_DOMAIN_TRI: + { + bcatcstr(glsl, "layout(triangles) in;\n"); + break; + } + case TESSELLATOR_DOMAIN_QUAD: + { + bcatcstr(glsl, "layout(quads) in;\n"); + break; + } + default: + { + break; + } + } + } + break; + } + case OPCODE_DCL_TESS_PARTITIONING: + { + if (psContext->psShader->eShaderType == HULL_SHADER) + { + psContext->psShader->sInfo.eTessPartitioning = psDecl->value.eTessPartitioning; + } + break; + } + case OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY: + { + switch (psDecl->value.eOutputPrimitiveTopology) + { + case PRIMITIVE_TOPOLOGY_POINTLIST: + { + bcatcstr(glsl, "layout(points) out;\n"); + break; + } + case PRIMITIVE_TOPOLOGY_LINELIST_ADJ: + case PRIMITIVE_TOPOLOGY_LINESTRIP_ADJ: + case PRIMITIVE_TOPOLOGY_LINELIST: + case PRIMITIVE_TOPOLOGY_LINESTRIP: + { + bcatcstr(glsl, "layout(line_strip) out;\n"); + break; + } + + case PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ: + case PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ: + case PRIMITIVE_TOPOLOGY_TRIANGLESTRIP: + case PRIMITIVE_TOPOLOGY_TRIANGLELIST: + { + bcatcstr(glsl, "layout(triangle_strip) out;\n"); + break; + } default: - ASSERT(0); + { + break; + } + } + break; + } + case OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT: + { + bformata(glsl, "layout(max_vertices = %d) out;\n", psDecl->value.ui32MaxOutputVertexCount); + break; + } + case OPCODE_DCL_GS_INPUT_PRIMITIVE: + { + switch (psDecl->value.eInputPrimitive) + { + case PRIMITIVE_POINT: + { + bcatcstr(glsl, "layout(points) in;\n"); + break; + } + case PRIMITIVE_LINE: + { + bcatcstr(glsl, "layout(lines) in;\n"); + break; + } + case PRIMITIVE_LINE_ADJ: + { + bcatcstr(glsl, "layout(lines_adjacency) in;\n"); + break; + } + case PRIMITIVE_TRIANGLE: + { + bcatcstr(glsl, "layout(triangles) in;\n"); + break; + } + case PRIMITIVE_TRIANGLE_ADJ: + { + bcatcstr(glsl, "layout(triangles_adjacency) in;\n"); + break; + } + default: + { break; + } + } + break; + } + case OPCODE_DCL_INTERFACE: + { + const uint32_t interfaceID = psDecl->value.iface.ui32InterfaceID; + const uint32_t numUniforms = psDecl->value.iface.ui32ArraySize; + const uint32_t ui32NumBodiesPerTable = psContext->psShader->funcPointer[interfaceID].ui32NumBodiesPerTable; + ShaderVar* psVar; + uint32_t varFound; - } - break; - } - case OPCODE_DCL_INPUT_SIV: - { - if(psShader->eShaderType == PIXEL_SHADER && psContext->psDependencies) - { - psContext->psDependencies->SetInterpolationMode(psDecl->asOperands[0].ui32RegisterNumber, psDecl->value.eInterpolation); - } - break; - } - case OPCODE_DCL_INPUT_PS: - { - const Operand* psOperand = &psDecl->asOperands[0]; - int iNumComponents = psOperand->GetNumInputElements(psContext); - const char* StorageQualifier = "varying"; - const char* Precision = ""; - std::string inputName; - const char* Interpolation = ""; - int hasNoPerspective = psContext->psShader->eTargetLanguage <= LANG_ES_310 ? 0 : 1; - inputName = psContext->GetDeclaredInputName(psOperand, NULL, 1, NULL); - - if (InOutSupported(psContext->psShader->eTargetLanguage)) - { - StorageQualifier = "in"; - } - const ShaderInfo::InOutSignature* psSignature = NULL; - - psShader->sInfo.GetInputSignatureFromRegister(psDecl->asOperands[0].ui32RegisterNumber, - psDecl->asOperands[0].ui32CompMask, - &psSignature); - - if (psSignature->eComponentType == INOUT_COMPONENT_UINT32 || - psSignature->eComponentType == INOUT_COMPONENT_SINT32) // GLSL spec requires that integer inputs always have "flat" interpolation - { - Interpolation = GetInterpolationString(INTERPOLATION_CONSTANT, psContext->psShader->eTargetLanguage); - } - else - { - switch (psDecl->value.eInterpolation) - { - case INTERPOLATION_CONSTANT: - { - Interpolation = "flat "; - break; - } - case INTERPOLATION_LINEAR: - { - break; - } - case INTERPOLATION_LINEAR_CENTROID: - { - Interpolation = "centroid "; - break; - } - case INTERPOLATION_LINEAR_NOPERSPECTIVE: - { - Interpolation = hasNoPerspective ? "noperspective " : ""; - break; - } - case INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID: - { - Interpolation = hasNoPerspective ? "noperspective centroid " : "centroid"; - break; - } - case INTERPOLATION_LINEAR_SAMPLE: - { - Interpolation = hasNoPerspective ? "sample " : ""; - break; - } - case INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE: - { - Interpolation = hasNoPerspective ? "noperspective sample " : ""; - break; - } - default: - ASSERT(0); - break; - } - } - - if(HavePrecisionQualifiers(psContext)) - { - switch(psOperand->eMinPrecision) - { - case OPERAND_MIN_PRECISION_DEFAULT: - { - Precision = "highp"; - break; - } - case OPERAND_MIN_PRECISION_FLOAT_16: - { - Precision = "mediump"; - break; - } - case OPERAND_MIN_PRECISION_FLOAT_2_8: - { - Precision = EmitLowp(psContext) ? "lowp " : "mediump "; - break; - } - case OPERAND_MIN_PRECISION_SINT_16: - { - Precision = "mediump"; - break; - } - case OPERAND_MIN_PRECISION_UINT_16: - { - Precision = "mediump"; - break; - } - } - } - - bool haveFramebufferFetch = (psShader->extensions->EXT_shader_framebuffer_fetch && - psShader->eShaderType == PIXEL_SHADER && - psContext->flags & HLSLCC_FLAG_SHADER_FRAMEBUFFER_FETCH); - - // If this is a SV_Target input and framebuffer fetch is enabled, do special input declaration unless output is declared later - if (haveFramebufferFetch && psOperand->iPSInOut && inputName.size() == 13 && !strncmp(inputName.c_str(), "vs_SV_Target", 12)) - { - bstring type = NULL; - - switch (psSignature->eComponentType) - { - case INOUT_COMPONENT_UINT32: - { - if (iNumComponents > 1) - type = bformat("uvec%d", iNumComponents); - else - type = bformat("uint"); - break; - } - case INOUT_COMPONENT_SINT32: - { - if (iNumComponents > 1) - type = bformat("ivec%d", iNumComponents); - else - type = bformat("int"); - break; - } - case INOUT_COMPONENT_FLOAT32: - { - if (iNumComponents > 1) - type = bformat("vec%d", iNumComponents); - else - type = bformat("float"); - break; - } - default: - ASSERT(0); - break; - } - - uint32_t renderTarget = psSignature->ui32SemanticIndex; - - char OutputName[512]; - bstring oname; - oname = bformat("%s%s%d", psContext->outputPrefix, psSignature->semanticName.c_str(), renderTarget); - strncpy(OutputName, (char *)oname->data, 512); - bdestroy(oname); - - if(WriteToFragData(psContext->psShader->eTargetLanguage)) - { - if (haveFramebufferFetch) - { - bcatcstr(glsl, "#ifdef GL_EXT_shader_framebuffer_fetch\n"); - bformata(glsl, "#define vs_%s gl_LastFragData[%d]\n", OutputName, renderTarget); - bcatcstr(glsl, "#else\n"); - bformata(glsl, "#define vs_%s gl_FragData[%d]\n", OutputName, renderTarget); - bcatcstr(glsl, "#endif\n"); - } - else - bformata(glsl, "#define vs_%s gl_FragData[%d]\n", OutputName, renderTarget); - } - else - { - if (!RenderTargetDeclared(renderTarget)) - { - bstring layoutQualifier = bformat(""); - - if (HaveInOutLocationQualifier(psContext->psShader->eTargetLanguage) || - HaveLimitedInOutLocationQualifier(psContext->psShader->eTargetLanguage, psContext->psShader->extensions)) - { - uint32_t index = 0; - - if((psContext->flags & HLSLCC_FLAG_DUAL_SOURCE_BLENDING) && DualSourceBlendSupported(psContext->psShader->eTargetLanguage)) - { - if(renderTarget > 0) - { - renderTarget = 0; - index = 1; - } - layoutQualifier = bformat("layout(location = %d, index = %d) ", renderTarget, index); - } - else - { - layoutQualifier = bformat("layout(location = %d) ", renderTarget); - } - } - - if (haveFramebufferFetch) - { - bcatcstr(glsl, "#ifdef GL_EXT_shader_framebuffer_fetch\n"); - bformata(glsl, "%sinout %s %s %s;\n", bstr2cstr(layoutQualifier, '\0'), Precision, type->data, OutputName); - bcatcstr(glsl, "#else\n"); - bformata(glsl, "%sout %s %s %s;\n", bstr2cstr(layoutQualifier, '\0'), Precision, type->data, OutputName); - bcatcstr(glsl, "#endif\n"); - } - else - bformata(glsl, "%sout %s %s %s;\n", bstr2cstr(layoutQualifier, '\0'), Precision, type->data, OutputName); - } - } - break; - } - - DeclareInput(psContext, psDecl, - Interpolation, StorageQualifier, Precision, iNumComponents, INDEX_1D, inputName.c_str(), psOperand->ui32CompMask); - - HandleInputRedirect(psDecl, Precision); - - break; - } - case OPCODE_DCL_TEMPS: - { - uint32_t i = 0; - const uint32_t ui32NumTemps = psDecl->value.ui32NumTemps; - bool usePrecision = (HavePrecisionQualifiers(psContext) != 0); - - for (i = 0; i < ui32NumTemps; i++) - { - if (psShader->psFloatTempSizes[i] != 0) - bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_FLOAT, psShader->psFloatTempSizes[i], usePrecision), i); - if (psShader->psFloat16TempSizes[i] != 0) - bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "16_%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_FLOAT16, psShader->psFloat16TempSizes[i], usePrecision), i); - if (psShader->psFloat10TempSizes[i] != 0) - bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "10_%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_FLOAT10, psShader->psFloat10TempSizes[i], usePrecision), i); - if (psShader->psIntTempSizes[i] != 0) - bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "i%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_INT, psShader->psIntTempSizes[i], usePrecision), i); - if (psShader->psInt16TempSizes[i] != 0) - bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "i16_%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_INT16, psShader->psInt16TempSizes[i], usePrecision), i); - if (psShader->psInt12TempSizes[i] != 0) - bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "i12_%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_INT12, psShader->psInt12TempSizes[i], usePrecision), i); - if (psShader->psUIntTempSizes[i] != 0) - bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "u%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_UINT, psShader->psUIntTempSizes[i], usePrecision), i); - if (psShader->psUInt16TempSizes[i] != 0) - bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "u16_%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_UINT16, psShader->psUInt16TempSizes[i], usePrecision), i); - if (psShader->fp64 && (psShader->psDoubleTempSizes[i] != 0)) - bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "d%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_DOUBLE, psShader->psDoubleTempSizes[i], usePrecision), i); - if (psShader->psBoolTempSizes[i] != 0) - bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "b%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_BOOL, psShader->psBoolTempSizes[i], usePrecision), i); - } - break; - } - case OPCODE_SPECIAL_DCL_IMMCONST: - { - ASSERT(0 && "DX9 shaders no longer supported!"); - break; - } - case OPCODE_DCL_CONSTANT_BUFFER: - { - const Operand* psOperand = &psDecl->asOperands[0]; - const uint32_t ui32BindingPoint = psOperand->aui32ArraySizes[0]; - - const ConstantBuffer* psCBuf = NULL; - psContext->psShader->sInfo.GetConstantBufferFromBindingPoint(RGROUP_CBUFFER, ui32BindingPoint, &psCBuf); - - // We don't have a original resource name, maybe generate one??? - if(!psCBuf) - { - if (HaveUniformBindingsAndLocations(psContext->psShader->eTargetLanguage, psContext->psShader->extensions, psContext->flags)) - bformata(glsl, "layout(location = %d) ",ui32BindingPoint); - - bformata(glsl, "layout(std140) uniform ConstantBuffer%d {\n\tvec4 data[%d];\n} cb%d;\n", ui32BindingPoint,psOperand->aui32ArraySizes[1],ui32BindingPoint); - break; - } - - if (psCBuf->name.substr(0, 20) == "hlslcc_SubpassInput_" && psCBuf->name.length() >= 23 && !psCBuf->asVars.empty()) - { - // Special case for vulkan subpass input. - - // The multisample versions have multiple members in the cbuffer, but we must only declare once. - // We still need to loop through all the variables and adjust names - - // Pick up the type and index - char ty = psCBuf->name[20]; - int idx = psCBuf->name[22] - '0'; - bool isMS = false; - std::pair binding = psContext->psDependencies->GetVulkanResourceBinding((std::string &)psCBuf->name, false, 2); - - bool declared = false; - for (std::vector::const_iterator itr = psCBuf->asVars.begin(); itr != psCBuf->asVars.end(); itr++) - { - ShaderVar &sv = (ShaderVar &)*itr; - if (sv.name.substr(0, 15) == "hlslcc_fbinput_") - { - if (!declared) - { - switch (ty) - { - case 'f': - bformata(glsl, "layout(input_attachment_index = %d, set = %d, binding = %d) uniform highp subpassInput %s;\n", idx, binding.first, binding.second, sv.name.c_str()); - break; - case 'h': - bformata(glsl, "layout(input_attachment_index = %d, set = %d, binding = %d) uniform mediump subpassInput %s;\n", idx, binding.first, binding.second, sv.name.c_str()); - break; - case 'i': - bformata(glsl, "layout(input_attachment_index = %d, set = %d, binding = %d) uniform isubpassInput %s;\n", idx, binding.first, binding.second, sv.name.c_str()); - break; - case 'u': - bformata(glsl, "layout(input_attachment_index = %d, set = %d, binding = %d) uniform usubpassInput %s;\n", idx, binding.first, binding.second, sv.name.c_str()); - break; - case 'F': - bformata(glsl, "layout(input_attachment_index = %d, set = %d, binding = %d) uniform highp subpassInputMS %s;\n", idx, binding.first, binding.second, sv.name.substr(0, 16).c_str()); - isMS = true; - break; - case 'H': - bformata(glsl, "layout(input_attachment_index = %d, set = %d, binding = %d) uniform mediump subpassInputMS %s;\n", idx, binding.first, binding.second, sv.name.substr(0, 16).c_str()); - isMS = true; - break; - case 'I': - bformata(glsl, "layout(input_attachment_index = %d, set = %d, binding = %d) uniform isubpassInputMS %s;\n", idx, binding.first, binding.second, sv.name.substr(0, 16).c_str()); - isMS = true; - break; - case 'U': - bformata(glsl, "layout(input_attachment_index = %d, set = %d, binding = %d) uniform usubpassInputMS %s;\n", idx, binding.first, binding.second, sv.name.substr(0, 16).c_str()); - isMS = true; - break; - default: - break; - } - declared = true; - } - else - { - if (ty == 'F' || ty == 'I' || ty == 'U') - isMS = true; - } - // Munge the name so it'll get the correct function call in GLSL directly - sv.name.insert(0, "subpassLoad("); - if (isMS) - sv.name.append(","); - else - sv.name.append(")"); - // Also update the type name - sv.sType.name = sv.name; - sv.sType.fullName = sv.name; - } - } - - // Break out so this doesn't get declared. - break; - } - - if(psCBuf->name == "OVR_multiview") - { - // Special case for piggy-backing multiview info out - // This is not really a cbuffer, but if we see this being accessed, we know we need viewID - - // Extract numViews - uint32_t numViews = 0; - for(std::vector::const_iterator itr = psCBuf->asVars.begin(); itr != psCBuf->asVars.end(); itr++) - { - if(strncmp(itr->name.c_str(), "numViews_", 9) == 0) - { - // I really don't think we'll ever have more than 9 multiviews - numViews = itr->name[9] - '0'; - break; - } - } - if(numViews > 0 && numViews < 10) - { - // multiview2 is required because we have built-in shaders that do eye-dependent work other than just position - psContext->RequireExtension("GL_OVR_multiview2"); - - if(psShader->eShaderType == VERTEX_SHADER) - bformata(glsl, "layout(num_views = %d) in;\n", numViews); - - break; // Break out so we don't actually declare this cbuffer - - } - } - - if(psContext->flags & HLSLCC_FLAG_UNIFORM_BUFFER_OBJECT) - { - if(psContext->flags & HLSLCC_FLAG_GLOBAL_CONSTS_NEVER_IN_UBO && psCBuf->name[0] == '$') - { - DeclareStructConstants(ui32BindingPoint, psCBuf, psOperand, glsl); - } - else - { - DeclareUBOConstants(ui32BindingPoint, psCBuf, glsl); - } - } - else - { - DeclareStructConstants(ui32BindingPoint, psCBuf, psOperand, glsl); - } - break; - } - case OPCODE_DCL_RESOURCE: - { - psShader->aeResourceDims[psDecl->asOperands[0].ui32RegisterNumber] = psDecl->value.eResourceDimension; - - // Vulkan doesn't use combined textures+samplers, so do own handling in a separate func - if (psContext->IsVulkan()) - { - TranslateVulkanResource(psContext, psDecl); - break; - } - - if (HaveUniformBindingsAndLocations(psContext->psShader->eTargetLanguage, psContext->psShader->extensions, psContext->flags)) - { - // Explicit layout bindings are not currently compatible with combined texture samplers. The layout below assumes there is exactly one GLSL sampler - // for each HLSL texture declaration, but when combining textures+samplers, there can be multiple OGL samplers for each HLSL texture declaration. - if((psContext->flags & HLSLCC_FLAG_COMBINE_TEXTURE_SAMPLERS) != HLSLCC_FLAG_COMBINE_TEXTURE_SAMPLERS) - { - //Constant buffer locations start at 0. Resource locations start at ui32NumConstantBuffers. - bformata(glsl, "layout(location = %d) ", - psContext->psShader->sInfo.psConstantBuffers.size() + psDecl->asOperands[0].ui32RegisterNumber); - } - } - - switch(psDecl->value.eResourceDimension) - { - case RESOURCE_DIMENSION_BUFFER: - { - bcatcstr(glsl, "uniform "); - if (IsESLanguage(psContext->psShader->eTargetLanguage)) - bcatcstr(glsl, "highp "); - bformata(glsl, "%s ", GetSamplerType(psContext, - RESOURCE_DIMENSION_BUFFER, - psDecl->asOperands[0].ui32RegisterNumber)); - TranslateOperand(&psDecl->asOperands[0], TO_FLAG_NONE); - bcatcstr(glsl, ";\n"); - break; - } - case RESOURCE_DIMENSION_TEXTURE1D: - { - TranslateResourceTexture(psContext, psDecl, 1); - break; - } - case RESOURCE_DIMENSION_TEXTURE2D: - { - TranslateResourceTexture(psContext, psDecl, 1); - break; - } - case RESOURCE_DIMENSION_TEXTURE2DMS: - { - TranslateResourceTexture(psContext, psDecl, 0); - break; - } - case RESOURCE_DIMENSION_TEXTURE3D: - { - TranslateResourceTexture(psContext, psDecl, 0); - break; - } - case RESOURCE_DIMENSION_TEXTURECUBE: - { - TranslateResourceTexture(psContext, psDecl, 1); - break; - } - case RESOURCE_DIMENSION_TEXTURE1DARRAY: - { - TranslateResourceTexture(psContext, psDecl, 1); - break; - } - case RESOURCE_DIMENSION_TEXTURE2DARRAY: - { - TranslateResourceTexture(psContext, psDecl, 1); - break; - } - case RESOURCE_DIMENSION_TEXTURE2DMSARRAY: - { - TranslateResourceTexture(psContext, psDecl, 0); - break; - } - case RESOURCE_DIMENSION_TEXTURECUBEARRAY: - { - TranslateResourceTexture(psContext, psDecl, 1); - break; - } + const char* uniformName; + + varFound = psContext->psShader->sInfo.GetInterfaceVarFromOffset(interfaceID, &psVar); + ASSERT(varFound); + uniformName = &psVar->name[0]; + + bformata(glsl, "subroutine uniform SubroutineType %s[%d*%d];\n", uniformName, numUniforms, ui32NumBodiesPerTable); + break; + } + case OPCODE_DCL_FUNCTION_BODY: + { + //bformata(glsl, "void Func%d();//%d\n", psDecl->asOperands[0].ui32RegisterNumber, psDecl->asOperands[0].eType); + break; + } + case OPCODE_DCL_FUNCTION_TABLE: + { + break; + } + case OPCODE_CUSTOMDATA: + { + // On Vulkan we just spew the data in uints as-is + if (psContext->IsVulkan()) + { + bstring glsl = *psContext->currentGLSLString; + bformata(glsl, "const uvec4 ImmCB_%d[] = uvec4[%d] (\n", psContext->currentPhase, psDecl->asImmediateConstBuffer.size()); + bool isFirst = true; + std::for_each(psDecl->asImmediateConstBuffer.begin(), psDecl->asImmediateConstBuffer.end(), [&](const ICBVec4 &data) + { + if (!isFirst) + { + bcatcstr(glsl, ",\n"); + } + isFirst = false; + bformata(glsl, "\tuvec4(0x%X, 0x%X, 0x%X, 0x%X)", data.a, data.b, data.c, data.d); + }); + bcatcstr(glsl, ");\n"); + } + else if (psContext->IsSwitch()) + { + bstring glsl = *psContext->currentGLSLString; + bformata(glsl, "const vec4 ImmCB_%d[] = vec4[%d] (\n", psContext->currentPhase, psDecl->asImmediateConstBuffer.size()); + bool isFirst = true; + std::for_each(psDecl->asImmediateConstBuffer.begin(), psDecl->asImmediateConstBuffer.end(), [&](const ICBVec4 &data) + { + if (!isFirst) + { + bcatcstr(glsl, ",\n"); + } + isFirst = false; + bformata(glsl, "vec4(uintBitsToFloat(uint(0x%Xu)), uintBitsToFloat(uint(0x%Xu)), uintBitsToFloat(uint(0x%Xu)), uintBitsToFloat(uint(0x%Xu)))", data.a, data.b, data.c, data.d); + }); + bcatcstr(glsl, ");\n"); + } + else + { + // TODO: This is only ever accessed as a float currently. Do trickery if we ever see ints accessed from an array. + // Walk through all the chunks we've seen in this phase. + ShaderPhase &sp = psShader->asPhases[psContext->currentPhase]; + std::for_each(sp.m_ConstantArrayInfo.m_Chunks.begin(), sp.m_ConstantArrayInfo.m_Chunks.end(), [this](const std::pair &chunk) + { + bstring glsl = *psContext->currentGLSLString; + uint32_t componentCount = chunk.second.m_ComponentCount; + // Just do the declaration here and contents to earlyMain. + if (componentCount == 1) + bformata(glsl, "float ImmCB_%d_%d_%d[%d];\n", psContext->currentPhase, chunk.first, chunk.second.m_Rebase, chunk.second.m_Size); + else + bformata(glsl, "vec%d ImmCB_%d_%d_%d[%d];\n", componentCount, psContext->currentPhase, chunk.first, chunk.second.m_Rebase, chunk.second.m_Size); + + if (!HaveDynamicIndexing(psContext)) + { + bstring name = bfromcstr(""); + bformata(name, "ImmCB_%d_%d_%d", psContext->currentPhase, chunk.first, chunk.second.m_Rebase); + SHADER_VARIABLE_CLASS eClass = componentCount > 1 ? SVC_VECTOR : SVC_SCALAR; + + DeclareDynamicIndexWrapper((const char *)name->data, eClass, SVT_FLOAT, 1, componentCount, chunk.second.m_Size); + bdestroy(name); + } + + bstring tgt = psContext->psShader->asPhases[psContext->currentPhase].earlyMain; + Declaration *psDecl = psContext->psShader->asPhases[psContext->currentPhase].m_ConstantArrayInfo.m_OrigDeclaration; + if (componentCount == 1) + { + for (uint32_t i = 0; i < chunk.second.m_Size; i++) + { + float val[4] = { + *(float*)&psDecl->asImmediateConstBuffer[i + chunk.first].a, + *(float*)&psDecl->asImmediateConstBuffer[i + chunk.first].b, + *(float*)&psDecl->asImmediateConstBuffer[i + chunk.first].c, + *(float*)&psDecl->asImmediateConstBuffer[i + chunk.first].d + }; + bformata(tgt, "\tImmCB_%d_%d_%d[%d] = ", psContext->currentPhase, chunk.first, chunk.second.m_Rebase, i); + if (fpcheck(val[chunk.second.m_Rebase]) && HaveBitEncodingOps(psContext->psShader->eTargetLanguage)) + bformata(tgt, "uintBitsToFloat(uint(0x%Xu))", *(uint32_t *)&val[chunk.second.m_Rebase]); + else + HLSLcc::PrintFloat(tgt, val[chunk.second.m_Rebase]); + bcatcstr(tgt, ";\n"); + } + } + else + { + for (uint32_t i = 0; i < chunk.second.m_Size; i++) + { + float val[4] = { + *(float*)&psDecl->asImmediateConstBuffer[i + chunk.first].a, + *(float*)&psDecl->asImmediateConstBuffer[i + chunk.first].b, + *(float*)&psDecl->asImmediateConstBuffer[i + chunk.first].c, + *(float*)&psDecl->asImmediateConstBuffer[i + chunk.first].d + }; + bformata(tgt, "\tImmCB_%d_%d_%d[%d] = vec%d(", psContext->currentPhase, chunk.first, chunk.second.m_Rebase, i, componentCount); + for (uint32_t k = 0; k < componentCount; k++) + { + if (k != 0) + bcatcstr(tgt, ", "); + if (fpcheck(val[k]) && HaveBitEncodingOps(psContext->psShader->eTargetLanguage)) + bformata(tgt, "uintBitsToFloat(uint(0x%Xu))", *(uint32_t *)&val[k + chunk.second.m_Rebase]); + else + HLSLcc::PrintFloat(tgt, val[k + chunk.second.m_Rebase]); + } + bcatcstr(tgt, ");\n"); + } + } + }); + } + + + break; + } + case OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT: + case OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT: + break; // Nothing to do + + case OPCODE_DCL_INDEXABLE_TEMP: + { + const uint32_t ui32RegIndex = psDecl->sIdxTemp.ui32RegIndex; + const uint32_t ui32RegCount = psDecl->sIdxTemp.ui32RegCount; + const uint32_t ui32RegComponentSize = psDecl->sIdxTemp.ui32RegComponentSize; + bformata(glsl, "vec%d TempArray%d[%d];\n", ui32RegComponentSize, ui32RegIndex, ui32RegCount); + break; + } + case OPCODE_DCL_INDEX_RANGE: + { + switch (psDecl->asOperands[0].eType) + { + case OPERAND_TYPE_OUTPUT: + case OPERAND_TYPE_INPUT: + { + const ShaderInfo::InOutSignature* psSignature = NULL; + const char* type = "vec"; + const char* Precision = ""; + uint32_t startReg = 0; + uint32_t i; + bstring *oldString; + int regSpace = psDecl->asOperands[0].GetRegisterSpace(psContext); + int isInput = psDecl->asOperands[0].eType == OPERAND_TYPE_INPUT ? 1 : 0; + + if (regSpace == 0) + { + if (isInput) + psShader->sInfo.GetInputSignatureFromRegister( + psDecl->asOperands[0].ui32RegisterNumber, + psDecl->asOperands[0].ui32CompMask, + &psSignature); + else + psShader->sInfo.GetOutputSignatureFromRegister( + psDecl->asOperands[0].ui32RegisterNumber, + psDecl->asOperands[0].ui32CompMask, + psShader->ui32CurrentVertexOutputStream, + &psSignature); + } + else + psShader->sInfo.GetPatchConstantSignatureFromRegister(psDecl->asOperands[0].ui32RegisterNumber, psDecl->asOperands[0].ui32CompMask, &psSignature); + + ASSERT(psSignature != NULL); + + switch (psSignature->eComponentType) + { + case INOUT_COMPONENT_UINT32: + { + type = "uvec"; + break; + } + case INOUT_COMPONENT_SINT32: + { + type = "ivec"; + break; + } + case INOUT_COMPONENT_FLOAT32: + { + break; + } + default: + ASSERT(0); + break; + } + + if (HavePrecisionQualifiers(psContext)) + { + switch (psSignature->eMinPrec) // TODO What if the inputs in the indexed range are of different precisions? + { + default: + { + Precision = "highp "; + break; + } + case MIN_PRECISION_ANY_16: + case MIN_PRECISION_FLOAT_16: + case MIN_PRECISION_SINT_16: + case MIN_PRECISION_UINT_16: + { + Precision = "mediump "; + break; + } + case MIN_PRECISION_FLOAT_2_8: + { + Precision = EmitLowp(psContext) ? "lowp " : "mediump "; + break; + } + } + } + + startReg = psDecl->asOperands[0].ui32RegisterNumber; + bformata(glsl, "%s%s4 phase%d_%sput%d_%d[%d];\n", Precision, type, psContext->currentPhase, isInput ? "In" : "Out", regSpace, startReg, psDecl->value.ui32IndexRange); + oldString = psContext->currentGLSLString; + glsl = isInput ? psContext->psShader->asPhases[psContext->currentPhase].earlyMain : psContext->psShader->asPhases[psContext->currentPhase].postShaderCode; + psContext->currentGLSLString = &glsl; + if (isInput == 0) + psContext->psShader->asPhases[psContext->currentPhase].hasPostShaderCode = 1; + for (i = 0; i < psDecl->value.ui32IndexRange; i++) + { + int dummy = 0; + std::string realName; + uint32_t destMask = psDecl->asOperands[0].ui32CompMask; + uint32_t rebase = 0; + const ShaderInfo::InOutSignature *psSig = NULL; + uint32_t regSpace = psDecl->asOperands[0].GetRegisterSpace(psContext); + + if (regSpace == 0) + if (isInput) + psContext->psShader->sInfo.GetInputSignatureFromRegister(startReg + i, destMask, &psSig); + else + psContext->psShader->sInfo.GetOutputSignatureFromRegister(startReg + i, destMask, 0, &psSig); + else + psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(startReg + i, destMask, &psSig); + + ASSERT(psSig != NULL); + + if ((psSig->ui32Mask & destMask) == 0) + continue; // Skip dummy writes (vec2 texcoords get filled to vec4 with zeroes etc) + + while ((psSig->ui32Mask & (1 << rebase)) == 0) + rebase++; + + ((Declaration *)psDecl)->asOperands[0].ui32RegisterNumber = startReg + i; + + if (isInput) + { + realName = psContext->GetDeclaredInputName(&psDecl->asOperands[0], &dummy, 1, NULL); + + psContext->AddIndentation(); + + bformata(glsl, "phase%d_Input%d_%d[%d]", psContext->currentPhase, regSpace, startReg, i); + + if (destMask != OPERAND_4_COMPONENT_MASK_ALL) + { + int k; + const char *swizzle = "xyzw"; + bcatcstr(glsl, "."); + for (k = 0; k < 4; k++) + { + if ((destMask & (1 << k)) && (psSig->ui32Mask & (1 << k))) + { + bformata(glsl, "%c", swizzle[k]); + } + } + } + bcatcstr(glsl, " = "); + bcatcstr(glsl, realName.c_str()); + if (destMask != OPERAND_4_COMPONENT_MASK_ALL && destMask != psSig->ui32Mask) + { + int k; + const char *swizzle = "xyzw"; + bcatcstr(glsl, "."); + for (k = 0; k < 4; k++) + { + if ((destMask & (1 << k)) && (psSig->ui32Mask & (1 << k))) + { + bformata(glsl, "%c", swizzle[k - rebase]); + } + } + } + } + else + { + realName = psContext->GetDeclaredOutputName(&psDecl->asOperands[0], &dummy, NULL, NULL, 1); + + psContext->AddIndentation(); + bcatcstr(glsl, realName.c_str()); + if (destMask != OPERAND_4_COMPONENT_MASK_ALL && destMask != psSig->ui32Mask) + { + int k; + const char *swizzle = "xyzw"; + bcatcstr(glsl, "."); + for (k = 0; k < 4; k++) + { + if ((destMask & (1 << k)) && (psSig->ui32Mask & (1 << k))) + { + bformata(glsl, "%c", swizzle[k - rebase]); + } + } + } + + bformata(glsl, " = phase%d_Output%d_%d[%d]", psContext->currentPhase, regSpace, startReg, i); + + if (destMask != OPERAND_4_COMPONENT_MASK_ALL) + { + int k; + const char *swizzle = "xyzw"; + bcatcstr(glsl, "."); + for (k = 0; k < 4; k++) + { + if ((destMask & (1 << k)) && (psSig->ui32Mask & (1 << k))) + { + bformata(glsl, "%c", swizzle[k]); + } + } + } + } + + bcatcstr(glsl, ";\n"); + } + + ((Declaration *)psDecl)->asOperands[0].ui32RegisterNumber = startReg; + psContext->currentGLSLString = oldString; + glsl = *psContext->currentGLSLString; + + for (i = 0; i < psDecl->value.ui32IndexRange; i++) + { + if (regSpace == 0) + { + if (isInput) + psShader->sInfo.GetInputSignatureFromRegister( + psDecl->asOperands[0].ui32RegisterNumber + i, + psDecl->asOperands[0].ui32CompMask, + &psSignature); + else + psShader->sInfo.GetOutputSignatureFromRegister( + psDecl->asOperands[0].ui32RegisterNumber + i, + psDecl->asOperands[0].ui32CompMask, + psShader->ui32CurrentVertexOutputStream, + &psSignature); + } + else + psShader->sInfo.GetPatchConstantSignatureFromRegister(psDecl->asOperands[0].ui32RegisterNumber + i, psDecl->asOperands[0].ui32CompMask, &psSignature); + + ASSERT(psSignature != NULL); + + ((ShaderInfo::InOutSignature *)psSignature)->isIndexed.insert(psContext->currentPhase); + ((ShaderInfo::InOutSignature *)psSignature)->indexStart[psContext->currentPhase] = startReg; + ((ShaderInfo::InOutSignature *)psSignature)->index[psContext->currentPhase] = i; + } + + + break; + } default: + // TODO Input index ranges. ASSERT(0); - break; + } + break; + } + case OPCODE_HS_DECLS: + { + break; + } + case OPCODE_DCL_INPUT_CONTROL_POINT_COUNT: + { + break; + } + case OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT: + { + if (psContext->psShader->eShaderType == HULL_SHADER) + { + bformata(glsl, "layout(vertices=%d) out;\n", psDecl->value.ui32MaxOutputVertexCount); + } + break; + } + case OPCODE_HS_FORK_PHASE: + { + break; + } + case OPCODE_HS_JOIN_PHASE: + { + break; + } + case OPCODE_DCL_SAMPLER: + { + if (psContext->IsVulkan()) + { + ResourceBinding *pRes = NULL; + psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_SAMPLER, psDecl->asOperands[0].ui32RegisterNumber, (const ResourceBinding **)&pRes); + ASSERT(pRes != NULL); + std::string name = ResourceName(psContext, RGROUP_SAMPLER, psDecl->asOperands[0].ui32RegisterNumber, 0); + const char *samplerPrecision = GetSamplerPrecision(psContext, pRes ? pRes->ePrecision : REFLECT_RESOURCE_PRECISION_UNKNOWN); + + GLSLCrossDependencyData::VulkanResourceBinding binding = psContext->psDependencies->GetVulkanResourceBinding(name); + const char *samplerType = psDecl->value.eSamplerMode == D3D10_SB_SAMPLER_MODE_COMPARISON ? "samplerShadow" : "sampler"; + bformata(glsl, "layout(set = %d, binding = %d) uniform %s %s %s;\n", binding.first, binding.second, samplerPrecision, samplerType, name.c_str()); + // Store the sampler mode to ShaderInfo, it's needed when we use the sampler + pRes->m_SamplerMode = psDecl->value.eSamplerMode; + } + break; + } + case OPCODE_DCL_HS_MAX_TESSFACTOR: + { + //For GLSL the max tessellation factor is fixed to the value of gl_MaxTessGenLevel. + break; + } + case OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED: + { + // non-float images need either 'i' or 'u' prefix. + char imageTypePrefix[2] = { 0, 0 }; + uint32_t bindpoint = psDecl->asOperands[0].ui32RegisterNumber; + const bool isVulkan = (psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0; + + if (psDecl->sUAV.ui32GloballyCoherentAccess & GLOBALLY_COHERENT_ACCESS) + { + bcatcstr(glsl, "coherent "); + } - } - break; - } - case OPCODE_DCL_OUTPUT: - { - bool needsDeclare = true; - if(psShader->eShaderType == HULL_SHADER && psShader->asPhases[psContext->currentPhase].ePhase == HS_CTRL_POINT_PHASE && psDecl->asOperands[0].ui32RegisterNumber==0) - { - // Need extra check from signature: - const ShaderInfo::InOutSignature *sig = NULL; - psShader->sInfo.GetOutputSignatureFromRegister(0, psDecl->asOperands->GetAccessMask(), 0, &sig, true); - if (!sig || sig->semanticName == "POSITION" || sig->semanticName == "POS") - { - needsDeclare = false; - AddBuiltinOutput(psDecl, 0, "gl_out[gl_InvocationID].gl_Position"); - } - } - - if(needsDeclare) - { - AddUserOutput(psDecl); - } - break; - } - case OPCODE_DCL_GLOBAL_FLAGS: - { - uint32_t ui32Flags = psDecl->value.ui32GlobalFlags; - - if(ui32Flags & GLOBAL_FLAG_FORCE_EARLY_DEPTH_STENCIL && psContext->psShader->eShaderType == PIXEL_SHADER) - { - bcatcstr(glsl, "layout(early_fragment_tests) in;\n"); - psShader->sInfo.bEarlyFragmentTests = true; - } - if(!(ui32Flags & GLOBAL_FLAG_REFACTORING_ALLOWED)) - { - //TODO add precise - //HLSL precise - http://msdn.microsoft.com/en-us/library/windows/desktop/hh447204(v=vs.85).aspx - } - if(ui32Flags & GLOBAL_FLAG_ENABLE_DOUBLE_PRECISION_FLOAT_OPS) - { - psContext->EnableExtension("GL_ARB_gpu_shader_fp64"); - psShader->fp64 = 1; - } - break; - } - - case OPCODE_DCL_THREAD_GROUP: - { - bformata(glsl, "layout(local_size_x = %d, local_size_y = %d, local_size_z = %d) in;\n", - psDecl->value.aui32WorkGroupSize[0], - psDecl->value.aui32WorkGroupSize[1], - psDecl->value.aui32WorkGroupSize[2]); - break; - } - case OPCODE_DCL_TESS_OUTPUT_PRIMITIVE: - { - if(psContext->psShader->eShaderType == HULL_SHADER) - { - psContext->psShader->sInfo.eTessOutPrim = psDecl->value.eTessOutPrim; - // Invert triangle winding order to match glsl better, except on vulkan - if ((psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) == 0) - { - if (psContext->psShader->sInfo.eTessOutPrim == TESSELLATOR_OUTPUT_TRIANGLE_CW) - psContext->psShader->sInfo.eTessOutPrim = TESSELLATOR_OUTPUT_TRIANGLE_CCW; - else if (psContext->psShader->sInfo.eTessOutPrim == TESSELLATOR_OUTPUT_TRIANGLE_CCW) - psContext->psShader->sInfo.eTessOutPrim = TESSELLATOR_OUTPUT_TRIANGLE_CW; - } - } - break; - } - case OPCODE_DCL_TESS_DOMAIN: - { - if(psContext->psShader->eShaderType == DOMAIN_SHADER) - { - switch(psDecl->value.eTessDomain) - { - case TESSELLATOR_DOMAIN_ISOLINE: - { - bcatcstr(glsl, "layout(isolines) in;\n"); - break; - } - case TESSELLATOR_DOMAIN_TRI: - { - bcatcstr(glsl, "layout(triangles) in;\n"); - break; - } - case TESSELLATOR_DOMAIN_QUAD: - { - bcatcstr(glsl, "layout(quads) in;\n"); - break; - } - default: - { - break; - } - } - } - break; - } - case OPCODE_DCL_TESS_PARTITIONING: - { - if(psContext->psShader->eShaderType == HULL_SHADER) - { - psContext->psShader->sInfo.eTessPartitioning = psDecl->value.eTessPartitioning; - } - break; - } - case OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY: - { - switch(psDecl->value.eOutputPrimitiveTopology) - { - case PRIMITIVE_TOPOLOGY_POINTLIST: - { - bcatcstr(glsl, "layout(points) out;\n"); - break; - } - case PRIMITIVE_TOPOLOGY_LINELIST_ADJ: - case PRIMITIVE_TOPOLOGY_LINESTRIP_ADJ: - case PRIMITIVE_TOPOLOGY_LINELIST: - case PRIMITIVE_TOPOLOGY_LINESTRIP: - { - bcatcstr(glsl, "layout(line_strip) out;\n"); - break; - } - - case PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ: - case PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ: - case PRIMITIVE_TOPOLOGY_TRIANGLESTRIP: - case PRIMITIVE_TOPOLOGY_TRIANGLELIST: - { - bcatcstr(glsl, "layout(triangle_strip) out;\n"); - break; - } - default: - { - break; - } - } - break; - } - case OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT: - { - bformata(glsl, "layout(max_vertices = %d) out;\n", psDecl->value.ui32MaxOutputVertexCount); - break; - } - case OPCODE_DCL_GS_INPUT_PRIMITIVE: - { - switch(psDecl->value.eInputPrimitive) - { - case PRIMITIVE_POINT: - { - bcatcstr(glsl, "layout(points) in;\n"); - break; - } - case PRIMITIVE_LINE: - { - bcatcstr(glsl, "layout(lines) in;\n"); - break; - } - case PRIMITIVE_LINE_ADJ: - { - bcatcstr(glsl, "layout(lines_adjacency) in;\n"); - break; - } - case PRIMITIVE_TRIANGLE: - { - bcatcstr(glsl, "layout(triangles) in;\n"); - break; - } - case PRIMITIVE_TRIANGLE_ADJ: - { - bcatcstr(glsl, "layout(triangles_adjacency) in;\n"); - break; - } - default: - { - break; - } - } - break; - } - case OPCODE_DCL_INTERFACE: - { - const uint32_t interfaceID = psDecl->value.iface.ui32InterfaceID; - const uint32_t numUniforms = psDecl->value.iface.ui32ArraySize; - const uint32_t ui32NumBodiesPerTable = psContext->psShader->funcPointer[interfaceID].ui32NumBodiesPerTable; - ShaderVar* psVar; - uint32_t varFound; - - const char* uniformName; - - varFound = psContext->psShader->sInfo.GetInterfaceVarFromOffset(interfaceID, &psVar); - ASSERT(varFound); - uniformName = &psVar->name[0]; - - bformata(glsl, "subroutine uniform SubroutineType %s[%d*%d];\n", uniformName, numUniforms, ui32NumBodiesPerTable); - break; - } - case OPCODE_DCL_FUNCTION_BODY: - { - //bformata(glsl, "void Func%d();//%d\n", psDecl->asOperands[0].ui32RegisterNumber, psDecl->asOperands[0].eType); - break; - } - case OPCODE_DCL_FUNCTION_TABLE: - { - break; - } - case OPCODE_CUSTOMDATA: - { - // On Vulkan we just spew the data in uints as-is - if (psContext->IsVulkan()) - { - bstring glsl = *psContext->currentGLSLString; - bformata(glsl, "const uvec4 ImmCB_%d[] = uvec4[%d] (\n", psContext->currentPhase, psDecl->asImmediateConstBuffer.size()); - bool isFirst = true; - std::for_each(psDecl->asImmediateConstBuffer.begin(), psDecl->asImmediateConstBuffer.end(), [&](const ICBVec4 &data) - { - if (!isFirst) - { - bcatcstr(glsl, ",\n"); - } - isFirst = false; - bformata(glsl, "\tuvec4(0x%X, 0x%X, 0x%X, 0x%X)", data.a, data.b, data.c, data.d); - }); - bcatcstr(glsl, ");\n"); - } - else - { - // TODO: This is only ever accessed as a float currently. Do trickery if we ever see ints accessed from an array. - // Walk through all the chunks we've seen in this phase. - ShaderPhase &sp = psShader->asPhases[psContext->currentPhase]; - std::for_each(sp.m_ConstantArrayInfo.m_Chunks.begin(), sp.m_ConstantArrayInfo.m_Chunks.end(), [this](const std::pair &chunk) - { - bstring glsl = *psContext->currentGLSLString; - uint32_t componentCount = chunk.second.m_ComponentCount; - // Just do the declaration here and contents to earlyMain. - if (componentCount == 1) - bformata(glsl, "float ImmCB_%d_%d_%d[%d];\n", psContext->currentPhase, chunk.first, chunk.second.m_Rebase, chunk.second.m_Size); - else - bformata(glsl, "vec%d ImmCB_%d_%d_%d[%d];\n", componentCount, psContext->currentPhase, chunk.first, chunk.second.m_Rebase, chunk.second.m_Size); - - if (!HaveDynamicIndexing(psContext)) - { - bstring name = bfromcstr(""); - bformata(name, "ImmCB_%d_%d_%d", psContext->currentPhase, chunk.first, chunk.second.m_Rebase); - SHADER_VARIABLE_CLASS eClass = componentCount > 1 ? SVC_VECTOR : SVC_SCALAR; - - DeclareDynamicIndexWrapper((const char *)name->data, eClass, SVT_FLOAT, 1, componentCount, chunk.second.m_Size); - bdestroy(name); - } - - bstring tgt = psContext->psShader->asPhases[psContext->currentPhase].earlyMain; - Declaration *psDecl = psContext->psShader->asPhases[psContext->currentPhase].m_ConstantArrayInfo.m_OrigDeclaration; - if (componentCount == 1) - { - for (uint32_t i = 0; i < chunk.second.m_Size; i++) - { - float val[4] = { - *(float*)&psDecl->asImmediateConstBuffer[i + chunk.first].a, - *(float*)&psDecl->asImmediateConstBuffer[i + chunk.first].b, - *(float*)&psDecl->asImmediateConstBuffer[i + chunk.first].c, - *(float*)&psDecl->asImmediateConstBuffer[i + chunk.first].d - }; - bformata(tgt, "\tImmCB_%d_%d_%d[%d] = ", psContext->currentPhase, chunk.first, chunk.second.m_Rebase, i); - if (fpcheck(val[chunk.second.m_Rebase]) && HaveBitEncodingOps(psContext->psShader->eTargetLanguage)) - bformata(tgt, "uintBitsToFloat(uint(0x%Xu))", *(uint32_t *)&val[chunk.second.m_Rebase]); - else - HLSLcc::PrintFloat(tgt, val[chunk.second.m_Rebase]); - bcatcstr(tgt, ";\n"); - } - } - else - { - for (uint32_t i = 0; i < chunk.second.m_Size; i++) - { - float val[4] = { - *(float*)&psDecl->asImmediateConstBuffer[i + chunk.first].a, - *(float*)&psDecl->asImmediateConstBuffer[i + chunk.first].b, - *(float*)&psDecl->asImmediateConstBuffer[i + chunk.first].c, - *(float*)&psDecl->asImmediateConstBuffer[i + chunk.first].d - }; - bformata(tgt, "\tImmCB_%d_%d_%d[%d] = vec%d(", psContext->currentPhase, chunk.first, chunk.second.m_Rebase, i, componentCount); - for (uint32_t k = 0; k < componentCount; k++) - { - if (k != 0) - bcatcstr(tgt, ", "); - if (fpcheck(val[k]) && HaveBitEncodingOps(psContext->psShader->eTargetLanguage)) - bformata(tgt, "uintBitsToFloat(uint(0x%Xu))", *(uint32_t *)&val[k + chunk.second.m_Rebase]); - else - HLSLcc::PrintFloat(tgt, val[k + chunk.second.m_Rebase]); - } - bcatcstr(tgt, ");\n"); - } - } - - }); - } - - - - break; - } - case OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT: - case OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT: - break; // Nothing to do - - case OPCODE_DCL_INDEXABLE_TEMP: - { - const uint32_t ui32RegIndex = psDecl->sIdxTemp.ui32RegIndex; - const uint32_t ui32RegCount = psDecl->sIdxTemp.ui32RegCount; - const uint32_t ui32RegComponentSize = psDecl->sIdxTemp.ui32RegComponentSize; - bformata(glsl, "vec%d TempArray%d[%d];\n", ui32RegComponentSize, ui32RegIndex, ui32RegCount); - break; - } - case OPCODE_DCL_INDEX_RANGE: - { - switch (psDecl->asOperands[0].eType) - { - case OPERAND_TYPE_OUTPUT: - case OPERAND_TYPE_INPUT: - { - const ShaderInfo::InOutSignature* psSignature = NULL; - const char* type = "vec"; - const char* Precision = ""; - uint32_t startReg = 0; - uint32_t i; - bstring *oldString; - int regSpace = psDecl->asOperands[0].GetRegisterSpace(psContext); - int isInput = psDecl->asOperands[0].eType == OPERAND_TYPE_INPUT ? 1 : 0; - - if (regSpace == 0) - { - if (isInput) - psShader->sInfo.GetInputSignatureFromRegister( - psDecl->asOperands[0].ui32RegisterNumber, - psDecl->asOperands[0].ui32CompMask, - &psSignature); - else - psShader->sInfo.GetOutputSignatureFromRegister( - psDecl->asOperands[0].ui32RegisterNumber, - psDecl->asOperands[0].ui32CompMask, - psShader->ui32CurrentVertexOutputStream, - &psSignature); - } - else - psShader->sInfo.GetPatchConstantSignatureFromRegister(psDecl->asOperands[0].ui32RegisterNumber, psDecl->asOperands[0].ui32CompMask, &psSignature); - - ASSERT(psSignature != NULL); - - switch (psSignature->eComponentType) - { - case INOUT_COMPONENT_UINT32: - { - type = "uvec"; - break; - } - case INOUT_COMPONENT_SINT32: - { - type = "ivec"; - break; - } - case INOUT_COMPONENT_FLOAT32: - { - break; - } + if (!(psDecl->sUAV.ui32AccessFlags & ACCESS_FLAG_READ) && + !(psContext->flags & HLSLCC_FLAG_GLES31_IMAGE_QUALIFIERS) && !isVulkan) + { //Special case on desktop glsl: writeonly image does not need format qualifier + bformata(glsl, "writeonly layout(binding=%d) ", bindpoint); + } + else + { + // Use 4 component format as a fallback if no instruction defines it + uint32_t numComponents = psDecl->sUAV.ui32NumComponents > 0 ? psDecl->sUAV.ui32NumComponents : 4; + + if (!(psDecl->sUAV.ui32AccessFlags & ACCESS_FLAG_READ)) + bcatcstr(glsl, "writeonly "); + else if (!(psDecl->sUAV.ui32AccessFlags & ACCESS_FLAG_WRITE)) + bcatcstr(glsl, "readonly "); + + if ((psDecl->sUAV.ui32AccessFlags & ACCESS_FLAG_WRITE) && IsESLanguage(psShader->eTargetLanguage)) + { + // Need to require the extension + psContext->RequireExtension("GL_EXT_texture_buffer"); + } + + if (isVulkan) + { + std::string name = ResourceName(psContext, RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, 0); + GLSLCrossDependencyData::VulkanResourceBinding binding = psContext->psDependencies->GetVulkanResourceBinding(name); + bformata(glsl, "layout(set = %d, binding = %d, ", binding.first, binding.second); + } + else + bformata(glsl, "layout(binding=%d, ", bindpoint); + + //TODO: catch bad format cases. e.g. es supports only limited format set. no rgb formats on glsl + if (numComponents >= 1) + bcatcstr(glsl, "r"); + if (numComponents >= 2) + bcatcstr(glsl, "g"); + if (numComponents >= 3) + bcatcstr(glsl, "ba"); + + switch (psDecl->sUAV.Type) + { + case RETURN_TYPE_FLOAT: + bcatcstr(glsl, "32f) highp "); //TODO: half case? + break; + case RETURN_TYPE_UNORM: + bcatcstr(glsl, "8) lowp "); + break; + case RETURN_TYPE_SNORM: + bcatcstr(glsl, "8_snorm) lowp "); + break; + case RETURN_TYPE_UINT: + bcatcstr(glsl, "32ui) highp "); //TODO: 16/8 cases? + break; + case RETURN_TYPE_SINT: + bcatcstr(glsl, "32i) highp "); //TODO: 16/8 cases? + break; default: ASSERT(0); - break; + } + } - } - - if (HavePrecisionQualifiers(psContext)) - { - switch (psSignature->eMinPrec) // TODO What if the inputs in the indexed range are of different precisions? - { - default: - { - Precision = "highp "; - break; - } - case MIN_PRECISION_ANY_16: - case MIN_PRECISION_FLOAT_16: - case MIN_PRECISION_SINT_16: - case MIN_PRECISION_UINT_16: - { - Precision = "mediump "; - break; - } - case MIN_PRECISION_FLOAT_2_8: - { - Precision = EmitLowp(psContext) ? "lowp " : "mediump "; - break; - } - } - } - - startReg = psDecl->asOperands[0].ui32RegisterNumber; - bformata(glsl, "%s%s4 phase%d_%sput%d_%d[%d];\n", Precision, type, psContext->currentPhase, isInput ? "In" : "Out", regSpace, startReg, psDecl->value.ui32IndexRange); - oldString = psContext->currentGLSLString; - glsl = isInput ? psContext->psShader->asPhases[psContext->currentPhase].earlyMain : psContext->psShader->asPhases[psContext->currentPhase].postShaderCode; - psContext->currentGLSLString = &glsl; - if (isInput == 0) - psContext->psShader->asPhases[psContext->currentPhase].hasPostShaderCode = 1; - for (i = 0; i < psDecl->value.ui32IndexRange; i++) - { - int dummy = 0; - std::string realName; - uint32_t destMask = psDecl->asOperands[0].ui32CompMask; - uint32_t rebase = 0; - const ShaderInfo::InOutSignature *psSig = NULL; - uint32_t regSpace = psDecl->asOperands[0].GetRegisterSpace(psContext); - - if (regSpace == 0) - if (isInput) - psContext->psShader->sInfo.GetInputSignatureFromRegister(startReg + i, destMask, &psSig); - else - psContext->psShader->sInfo.GetOutputSignatureFromRegister(startReg + i, destMask, 0, &psSig); - else - psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(startReg + i, destMask, &psSig); - - ASSERT(psSig != NULL); - - if ((psSig->ui32Mask & destMask) == 0) - continue; // Skip dummy writes (vec2 texcoords get filled to vec4 with zeroes etc) - - while ((psSig->ui32Mask & (1 << rebase)) == 0) - rebase++; - - ((Declaration *)psDecl)->asOperands[0].ui32RegisterNumber = startReg + i; - - if (isInput) - { - realName = psContext->GetDeclaredInputName(&psDecl->asOperands[0], &dummy, 1, NULL); - - psContext->AddIndentation(); - - bformata(glsl, "phase%d_Input%d_%d[%d]", psContext->currentPhase, regSpace, startReg, i); - - if (destMask != OPERAND_4_COMPONENT_MASK_ALL) - { - int k; - const char *swizzle = "xyzw"; - bcatcstr(glsl, "."); - for (k = 0; k < 4; k++) - { - if ((destMask & (1 << k)) && (psSig->ui32Mask & (1 << k))) - { - bformata(glsl, "%c", swizzle[k]); - } - } - } - bcatcstr(glsl, " = "); - bcatcstr(glsl, realName.c_str()); - if (destMask != OPERAND_4_COMPONENT_MASK_ALL && destMask != psSig->ui32Mask) - { - int k; - const char *swizzle = "xyzw"; - bcatcstr(glsl, "."); - for (k = 0; k < 4; k++) - { - if ((destMask & (1 << k)) && (psSig->ui32Mask & (1 << k))) - { - bformata(glsl, "%c", swizzle[k - rebase]); - } - } - } - } - else - { - realName = psContext->GetDeclaredOutputName(&psDecl->asOperands[0], &dummy, NULL, NULL, 1); - - psContext->AddIndentation(); - bcatcstr(glsl, realName.c_str()); - if (destMask != OPERAND_4_COMPONENT_MASK_ALL && destMask != psSig->ui32Mask) - { - int k; - const char *swizzle = "xyzw"; - bcatcstr(glsl, "."); - for (k = 0; k < 4; k++) - { - if ((destMask & (1 << k)) && (psSig->ui32Mask & (1 << k))) - { - bformata(glsl, "%c", swizzle[k - rebase]); - } - } - } - - bformata(glsl, " = phase%d_Output%d_%d[%d]", psContext->currentPhase, regSpace, startReg, i); - - if (destMask != OPERAND_4_COMPONENT_MASK_ALL) - { - int k; - const char *swizzle = "xyzw"; - bcatcstr(glsl, "."); - for (k = 0; k < 4; k++) - { - if ((destMask & (1 << k)) && (psSig->ui32Mask & (1 << k))) - { - bformata(glsl, "%c", swizzle[k]); - } - } - } - } - - bcatcstr(glsl, ";\n"); - } - - ((Declaration *)psDecl)->asOperands[0].ui32RegisterNumber = startReg; - psContext->currentGLSLString = oldString; - glsl = *psContext->currentGLSLString; - - for (i = 0; i < psDecl->value.ui32IndexRange; i++) - { - if (regSpace == 0) - { - if (isInput) - psShader->sInfo.GetInputSignatureFromRegister( - psDecl->asOperands[0].ui32RegisterNumber + i, - psDecl->asOperands[0].ui32CompMask, - &psSignature); - else - psShader->sInfo.GetOutputSignatureFromRegister( - psDecl->asOperands[0].ui32RegisterNumber + i, - psDecl->asOperands[0].ui32CompMask, - psShader->ui32CurrentVertexOutputStream, - &psSignature); - } - else - psShader->sInfo.GetPatchConstantSignatureFromRegister(psDecl->asOperands[0].ui32RegisterNumber + i, psDecl->asOperands[0].ui32CompMask, &psSignature); - - ASSERT(psSignature != NULL); - - ((ShaderInfo::InOutSignature *)psSignature)->isIndexed.insert(psContext->currentPhase); - ((ShaderInfo::InOutSignature *)psSignature)->indexStart[psContext->currentPhase] = startReg; - ((ShaderInfo::InOutSignature *)psSignature)->index[psContext->currentPhase] = i; - } - - - break; - } - default: - // TODO Input index ranges. - ASSERT(0); - } - break; - } - case OPCODE_HS_DECLS: - { - break; - } - case OPCODE_DCL_INPUT_CONTROL_POINT_COUNT: - { - break; - } - case OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT: - { - if(psContext->psShader->eShaderType == HULL_SHADER) - { - bformata(glsl, "layout(vertices=%d) out;\n", psDecl->value.ui32MaxOutputVertexCount); - } - break; - } - case OPCODE_HS_FORK_PHASE: - { - break; - } - case OPCODE_HS_JOIN_PHASE: - { - break; - } - case OPCODE_DCL_SAMPLER: - { - if (psContext->IsVulkan()) - { - ResourceBinding *pRes = NULL; - psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_SAMPLER, psDecl->asOperands[0].ui32RegisterNumber, (const ResourceBinding **)&pRes); - ASSERT(pRes != NULL); - std::string name = ResourceName(psContext, RGROUP_SAMPLER, psDecl->asOperands[0].ui32RegisterNumber, 0); - const char *samplerPrecision = GetSamplerPrecision(psContext, pRes ? pRes->ePrecision : REFLECT_RESOURCE_PRECISION_UNKNOWN); - - GLSLCrossDependencyData::VulkanResourceBinding binding = psContext->psDependencies->GetVulkanResourceBinding(name); - const char *samplerType = psDecl->value.eSamplerMode == D3D10_SB_SAMPLER_MODE_COMPARISON ? "samplerShadow" : "sampler"; - bformata(glsl, "layout(set = %d, binding = %d) uniform %s %s %s;\n", binding.first, binding.second, samplerPrecision, samplerType, name.c_str()); - // Store the sampler mode to ShaderInfo, it's needed when we use the sampler - pRes->m_SamplerMode = psDecl->value.eSamplerMode; - } - break; - } - case OPCODE_DCL_HS_MAX_TESSFACTOR: - { - //For GLSL the max tessellation factor is fixed to the value of gl_MaxTessGenLevel. - break; - } - case OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED: - { - // non-float images need either 'i' or 'u' prefix. - char imageTypePrefix[2] = { 0, 0 }; - uint32_t bindpoint = psDecl->asOperands[0].ui32RegisterNumber; - const bool isVulkan = (psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0; - - if(psDecl->sUAV.ui32GloballyCoherentAccess & GLOBALLY_COHERENT_ACCESS) - { - bcatcstr(glsl, "coherent "); - } - - if (!(psDecl->sUAV.ui32AccessFlags & ACCESS_FLAG_READ) && - !(psContext->flags & HLSLCC_FLAG_GLES31_IMAGE_QUALIFIERS) && !isVulkan) - { //Special case on desktop glsl: writeonly image does not need format qualifier - bformata(glsl, "writeonly layout(binding=%d) ", bindpoint); - } - else - { - // Use 4 component format as a fallback if no instruction defines it - uint32_t numComponents = psDecl->sUAV.ui32NumComponents > 0 ? psDecl->sUAV.ui32NumComponents : 4; - - if (!(psDecl->sUAV.ui32AccessFlags & ACCESS_FLAG_READ)) - bcatcstr(glsl, "writeonly "); - else if (!(psDecl->sUAV.ui32AccessFlags & ACCESS_FLAG_WRITE)) - bcatcstr(glsl, "readonly "); - - if ((psDecl->sUAV.ui32AccessFlags & ACCESS_FLAG_WRITE) && IsESLanguage(psShader->eTargetLanguage)) - { - // Need to require the extension - psContext->RequireExtension("GL_EXT_texture_buffer"); - } - - if(isVulkan) - { - std::string name = ResourceName(psContext, RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, 0); - GLSLCrossDependencyData::VulkanResourceBinding binding = psContext->psDependencies->GetVulkanResourceBinding(name); - bformata(glsl, "layout(set = %d, binding = %d, ", binding.first, binding.second); - } - else - bformata(glsl, "layout(binding=%d, ", bindpoint); - - //TODO: catch bad format cases. e.g. es supports only limited format set. no rgb formats on glsl - if (numComponents >= 1) - bcatcstr(glsl, "r"); - if (numComponents >= 2) - bcatcstr(glsl, "g"); - if (numComponents >= 3) - bcatcstr(glsl, "ba"); - - switch (psDecl->sUAV.Type) - { - case RETURN_TYPE_FLOAT: - bcatcstr(glsl, "32f) highp "); //TODO: half case? - break; - case RETURN_TYPE_UNORM: - bcatcstr(glsl, "8) lowp "); - break; - case RETURN_TYPE_SNORM: - bcatcstr(glsl, "8_snorm) lowp "); - break; - case RETURN_TYPE_UINT: - bcatcstr(glsl, "32ui) highp "); //TODO: 16/8 cases? - break; - case RETURN_TYPE_SINT: - bcatcstr(glsl, "32i) highp "); //TODO: 16/8 cases? - break; - default: - ASSERT(0); - } - } - - if (psDecl->sUAV.Type == RETURN_TYPE_UINT) - imageTypePrefix[0] = 'u'; - else if (psDecl->sUAV.Type == RETURN_TYPE_SINT) - imageTypePrefix[0] = 'i'; - - // GLSL requires images to be always explicitly defined as uniforms - switch(psDecl->value.eResourceDimension) - { - case RESOURCE_DIMENSION_BUFFER: - { - if(IsESLanguage(psShader->eTargetLanguage)) - psContext->RequireExtension("GL_EXT_texture_buffer"); - - bformata(glsl, "uniform %simageBuffer ", imageTypePrefix); - break; - } - case RESOURCE_DIMENSION_TEXTURE1D: - { - bformata(glsl, "uniform %simage1D ", imageTypePrefix); - break; - } - case RESOURCE_DIMENSION_TEXTURE2D: - { - bformata(glsl, "uniform %simage2D ", imageTypePrefix); - break; - } - case RESOURCE_DIMENSION_TEXTURE2DMS: - { - bformata(glsl, "uniform %simage2DMS ", imageTypePrefix); - break; - } - case RESOURCE_DIMENSION_TEXTURE3D: - { - bformata(glsl, "uniform %simage3D ", imageTypePrefix); - break; - } - case RESOURCE_DIMENSION_TEXTURECUBE: - { - bformata(glsl, "uniform %simageCube ", imageTypePrefix); - break; - } - case RESOURCE_DIMENSION_TEXTURE1DARRAY: - { - bformata(glsl, "uniform %simage1DArray ", imageTypePrefix); - break; - } - case RESOURCE_DIMENSION_TEXTURE2DARRAY: - { - bformata(glsl, "uniform %simage2DArray ", imageTypePrefix); - break; - } - case RESOURCE_DIMENSION_TEXTURE2DMSARRAY: - { - bformata(glsl, "uniform %simage3DArray ", imageTypePrefix); - break; - } - case RESOURCE_DIMENSION_TEXTURECUBEARRAY: - { - bformata(glsl, "uniform %simageCubeArray ", imageTypePrefix); - break; - } + if (psDecl->sUAV.Type == RETURN_TYPE_UINT) + imageTypePrefix[0] = 'u'; + else if (psDecl->sUAV.Type == RETURN_TYPE_SINT) + imageTypePrefix[0] = 'i'; + + // GLSL requires images to be always explicitly defined as uniforms + switch (psDecl->value.eResourceDimension) + { + case RESOURCE_DIMENSION_BUFFER: + { + if (IsESLanguage(psShader->eTargetLanguage)) + psContext->RequireExtension("GL_EXT_texture_buffer"); + + bformata(glsl, "uniform %simageBuffer ", imageTypePrefix); + break; + } + case RESOURCE_DIMENSION_TEXTURE1D: + { + bformata(glsl, "uniform %simage1D ", imageTypePrefix); + break; + } + case RESOURCE_DIMENSION_TEXTURE2D: + { + bformata(glsl, "uniform %simage2D ", imageTypePrefix); + break; + } + case RESOURCE_DIMENSION_TEXTURE2DMS: + { + bformata(glsl, "uniform %simage2DMS ", imageTypePrefix); + break; + } + case RESOURCE_DIMENSION_TEXTURE3D: + { + bformata(glsl, "uniform %simage3D ", imageTypePrefix); + break; + } + case RESOURCE_DIMENSION_TEXTURECUBE: + { + bformata(glsl, "uniform %simageCube ", imageTypePrefix); + break; + } + case RESOURCE_DIMENSION_TEXTURE1DARRAY: + { + bformata(glsl, "uniform %simage1DArray ", imageTypePrefix); + break; + } + case RESOURCE_DIMENSION_TEXTURE2DARRAY: + { + bformata(glsl, "uniform %simage2DArray ", imageTypePrefix); + break; + } + case RESOURCE_DIMENSION_TEXTURE2DMSARRAY: + { + bformata(glsl, "uniform %simage3DArray ", imageTypePrefix); + break; + } + case RESOURCE_DIMENSION_TEXTURECUBEARRAY: + { + bformata(glsl, "uniform %simageCubeArray ", imageTypePrefix); + break; + } default: ASSERT(0); break; + } + TranslateOperand(&psDecl->asOperands[0], TO_FLAG_NONE); + bcatcstr(glsl, ";\n"); + break; + } + case OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED: + { + const bool isVulkan = (psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0; + const bool avoidAtomicCounter = (psContext->flags & HLSLCC_FLAG_AVOID_SHADER_ATOMIC_COUNTERS) != 0; + if (psDecl->sUAV.bCounter) + { + if (isVulkan) + { + std::string uavname = ResourceName(psContext, RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, 0); + GLSLCrossDependencyData::VulkanResourceBinding uavBinding = psContext->psDependencies->GetVulkanResourceBinding(uavname, true); + GLSLCrossDependencyData::VulkanResourceBinding counterBinding = std::make_pair(uavBinding.first, uavBinding.second + 1); + bformata(glsl, "layout(set = %d, binding = %d) buffer %s_counterBuf { highp uint %s_counter; };\n", counterBinding.first, counterBinding.second, uavname.c_str(), uavname.c_str()); + + DeclareBufferVariable(psContext, psDecl->asOperands[0].ui32RegisterNumber, &psDecl->asOperands[0], + psDecl->sUAV.ui32GloballyCoherentAccess, 0, 1, 0, psDecl->ui32BufferStride, glsl); + } + else if (avoidAtomicCounter) // no support for atomic counter. We must use atomic functions in SSBO instead. + { + DeclareBufferVariable(psContext, psDecl->asOperands[0].ui32RegisterNumber, &psDecl->asOperands[0], + psDecl->sUAV.ui32GloballyCoherentAccess, 0, 1, 1, psDecl->ui32BufferStride, glsl); + } + else + { + bcatcstr(glsl, "layout (binding = 0) uniform "); + + if (HavePrecisionQualifiers(psContext)) + bcatcstr(glsl, "highp "); + bcatcstr(glsl, "atomic_uint "); + ResourceName(glsl, psContext, RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, 0); + bcatcstr(glsl, "_counter; \n"); + + DeclareBufferVariable(psContext, psDecl->asOperands[0].ui32RegisterNumber, &psDecl->asOperands[0], + psDecl->sUAV.ui32GloballyCoherentAccess, 0, 1, 0, psDecl->ui32BufferStride, glsl); + } + } + else + { + DeclareBufferVariable(psContext, psDecl->asOperands[0].ui32RegisterNumber, &psDecl->asOperands[0], + psDecl->sUAV.ui32GloballyCoherentAccess, 0, 1, 0, psDecl->ui32BufferStride, glsl); + } + + break; + } + case OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW: + { + const bool isVulkan = (psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0; + if (psDecl->sUAV.bCounter) + { + if (isVulkan) + { + std::string uavname = ResourceName(psContext, RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, 0); + GLSLCrossDependencyData::VulkanResourceBinding uavBinding = psContext->psDependencies->GetVulkanResourceBinding(uavname, true); + GLSLCrossDependencyData::VulkanResourceBinding counterBinding = std::make_pair(uavBinding.first, uavBinding.second + 1); + bformata(glsl, "layout(set = %d, binding = %d) buffer %s_counterBuf { highp uint %s_counter; };\n", counterBinding.first, counterBinding.second, uavname.c_str(), uavname.c_str()); + } + else + { + bcatcstr(glsl, "layout (binding = 0) uniform "); + if (HavePrecisionQualifiers(psContext)) + bcatcstr(glsl, "highp "); + bcatcstr(glsl, "atomic_uint "); + ResourceName(glsl, psContext, RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, 0); + bformata(glsl, "_counter; \n"); + } + } + + DeclareBufferVariable(psContext, psDecl->asOperands[0].ui32RegisterNumber, &psDecl->asOperands[0], + psDecl->sUAV.ui32GloballyCoherentAccess, 1, 1, 0, psDecl->ui32BufferStride, glsl); + + break; + } + case OPCODE_DCL_RESOURCE_STRUCTURED: + { + DeclareBufferVariable(psContext, psDecl->asOperands[0].ui32RegisterNumber, &psDecl->asOperands[0], + psDecl->sUAV.ui32GloballyCoherentAccess, 0, 0, 0, psDecl->ui32BufferStride, glsl); + break; + } + case OPCODE_DCL_RESOURCE_RAW: + { + DeclareBufferVariable(psContext, psDecl->asOperands[0].ui32RegisterNumber, &psDecl->asOperands[0], + psDecl->sUAV.ui32GloballyCoherentAccess, 1, 0, 0, psDecl->ui32BufferStride, glsl); + break; + } + case OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_STRUCTURED: + { + ShaderVarType* psVarType = &psShader->sInfo.sGroupSharedVarType[psDecl->asOperands[0].ui32RegisterNumber]; + + bcatcstr(glsl, "shared struct {\n"); + bformata(glsl, "\tuint value[%d];\n", psDecl->sTGSM.ui32Stride / 4); + bcatcstr(glsl, "} "); + TranslateOperand(&psDecl->asOperands[0], TO_FLAG_NONE); + bformata(glsl, "[%d];\n", + psDecl->sTGSM.ui32Count); + psVarType->name = "value"; + + psVarType->Columns = psDecl->sTGSM.ui32Stride / 4; + psVarType->Elements = psDecl->sTGSM.ui32Count; + break; + } + case OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_RAW: + { + ShaderVarType* psVarType = &psShader->sInfo.sGroupSharedVarType[psDecl->asOperands[0].ui32RegisterNumber]; + + bcatcstr(glsl, "shared uint "); + TranslateOperand(&psDecl->asOperands[0], TO_FLAG_NONE); + bformata(glsl, "[%d];\n", psDecl->sTGSM.ui32Count / psDecl->sTGSM.ui32Stride); + + psVarType->name = "$Element"; + + psVarType->Columns = 1; + psVarType->Elements = psDecl->sTGSM.ui32Count / psDecl->sTGSM.ui32Stride; + break; + } + case OPCODE_DCL_STREAM: + { + ASSERT(psDecl->asOperands[0].eType == OPERAND_TYPE_STREAM); + + + if (psShader->eTargetLanguage >= LANG_400 && (psShader->ui32CurrentVertexOutputStream != psDecl->asOperands[0].ui32RegisterNumber)) + { + // Only emit stream declaration for desktop GL >= 4.0, and only if we're declaring something else than the default 0 + bformata(glsl, "layout(stream = %d) out;\n", psShader->ui32CurrentVertexOutputStream); + } + psShader->ui32CurrentVertexOutputStream = psDecl->asOperands[0].ui32RegisterNumber; - } - TranslateOperand(&psDecl->asOperands[0], TO_FLAG_NONE); - bcatcstr(glsl, ";\n"); - break; - } - case OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED: - { - const bool isVulkan = (psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0; - const bool avoidAtomicCounter = (psContext->flags & HLSLCC_FLAG_AVOID_SHADER_ATOMIC_COUNTERS) != 0; - if(psDecl->sUAV.bCounter) - { - if (isVulkan) - { - std::string uavname = ResourceName(psContext, RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, 0); - GLSLCrossDependencyData::VulkanResourceBinding uavBinding = psContext->psDependencies->GetVulkanResourceBinding(uavname, true); - GLSLCrossDependencyData::VulkanResourceBinding counterBinding = std::make_pair(uavBinding.first, uavBinding.second+1); - bformata(glsl, "layout(set = %d, binding = %d) buffer %s_counterBuf { highp uint %s_counter; };\n", counterBinding.first, counterBinding.second, uavname.c_str(), uavname.c_str()); - - DeclareBufferVariable(psContext, psDecl->asOperands[0].ui32RegisterNumber, &psDecl->asOperands[0], - psDecl->sUAV.ui32GloballyCoherentAccess, 0, 1, 0, psDecl->ui32BufferStride, glsl); - } - else if (avoidAtomicCounter) // no support for atomic counter. We must use atomic functions in SSBO instead. - { - DeclareBufferVariable(psContext, psDecl->asOperands[0].ui32RegisterNumber, &psDecl->asOperands[0], - psDecl->sUAV.ui32GloballyCoherentAccess, 0, 1, 1, psDecl->ui32BufferStride, glsl); - } - else - { - bcatcstr(glsl, "layout (binding = 0) uniform "); - - if (HavePrecisionQualifiers(psContext)) - bcatcstr(glsl, "highp "); - bcatcstr(glsl, "atomic_uint "); - ResourceName(glsl, psContext, RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, 0); - bcatcstr(glsl, "_counter; \n"); - - DeclareBufferVariable(psContext, psDecl->asOperands[0].ui32RegisterNumber, &psDecl->asOperands[0], - psDecl->sUAV.ui32GloballyCoherentAccess, 0, 1, 0, psDecl->ui32BufferStride, glsl); - } - } - else - { - DeclareBufferVariable(psContext, psDecl->asOperands[0].ui32RegisterNumber, &psDecl->asOperands[0], - psDecl->sUAV.ui32GloballyCoherentAccess, 0, 1, 0, psDecl->ui32BufferStride, glsl); - } - - break; - } - case OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW: - { - const bool isVulkan = (psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0; - if(psDecl->sUAV.bCounter) - { - if (isVulkan) - { - std::string uavname = ResourceName(psContext, RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, 0); - GLSLCrossDependencyData::VulkanResourceBinding uavBinding = psContext->psDependencies->GetVulkanResourceBinding(uavname, true); - GLSLCrossDependencyData::VulkanResourceBinding counterBinding = std::make_pair(uavBinding.first, uavBinding.second + 1); - bformata(glsl, "layout(set = %d, binding = %d) buffer %s_counterBuf { highp uint %s_counter; };\n", counterBinding.first, counterBinding.second, uavname.c_str(), uavname.c_str()); - } - else - { - bcatcstr(glsl, "layout (binding = 0) uniform "); - if (HavePrecisionQualifiers(psContext)) - bcatcstr(glsl, "highp "); - bcatcstr(glsl, "atomic_uint "); - ResourceName(glsl, psContext, RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, 0); - bformata(glsl, "_counter; \n"); - } - } - - DeclareBufferVariable(psContext, psDecl->asOperands[0].ui32RegisterNumber, &psDecl->asOperands[0], - psDecl->sUAV.ui32GloballyCoherentAccess, 1, 1, 0, psDecl->ui32BufferStride, glsl); - - break; - } - case OPCODE_DCL_RESOURCE_STRUCTURED: - { - DeclareBufferVariable(psContext, psDecl->asOperands[0].ui32RegisterNumber, &psDecl->asOperands[0], - psDecl->sUAV.ui32GloballyCoherentAccess, 0, 0, 0, psDecl->ui32BufferStride, glsl); - break; - } - case OPCODE_DCL_RESOURCE_RAW: - { - DeclareBufferVariable(psContext, psDecl->asOperands[0].ui32RegisterNumber, &psDecl->asOperands[0], - psDecl->sUAV.ui32GloballyCoherentAccess, 1, 0, 0, psDecl->ui32BufferStride, glsl); - break; - } - case OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_STRUCTURED: - { - ShaderVarType* psVarType = &psShader->sInfo.sGroupSharedVarType[psDecl->asOperands[0].ui32RegisterNumber]; - - bcatcstr(glsl, "shared struct {\n"); - bformata(glsl, "\tuint value[%d];\n", psDecl->sTGSM.ui32Stride/4); - bcatcstr(glsl, "} "); - TranslateOperand(&psDecl->asOperands[0], TO_FLAG_NONE); - bformata(glsl, "[%d];\n", - psDecl->sTGSM.ui32Count); - psVarType->name = "value"; - - psVarType->Columns = psDecl->sTGSM.ui32Stride/4; - psVarType->Elements = psDecl->sTGSM.ui32Count; - break; - } - case OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_RAW: - { - ShaderVarType* psVarType = &psShader->sInfo.sGroupSharedVarType[psDecl->asOperands[0].ui32RegisterNumber]; - - bcatcstr(glsl, "shared uint "); - TranslateOperand(&psDecl->asOperands[0], TO_FLAG_NONE); - bformata(glsl, "[%d];\n", psDecl->sTGSM.ui32Count / psDecl->sTGSM.ui32Stride); - - psVarType->name = "$Element"; - - psVarType->Columns = 1; - psVarType->Elements = psDecl->sTGSM.ui32Count / psDecl->sTGSM.ui32Stride; - break; - } - case OPCODE_DCL_STREAM: - { - ASSERT(psDecl->asOperands[0].eType == OPERAND_TYPE_STREAM); - - - if (psShader->eTargetLanguage >= LANG_400 && (psShader->ui32CurrentVertexOutputStream != psDecl->asOperands[0].ui32RegisterNumber)) - { - // Only emit stream declaration for desktop GL >= 4.0, and only if we're declaring something else than the default 0 - bformata(glsl, "layout(stream = %d) out;\n", psShader->ui32CurrentVertexOutputStream); - } - psShader->ui32CurrentVertexOutputStream = psDecl->asOperands[0].ui32RegisterNumber; - - break; - } - case OPCODE_DCL_GS_INSTANCE_COUNT: - { - bformata(glsl, "layout(invocations = %d) in;\n", psDecl->value.ui32GSInstanceCount); - break; - } - default: - { - ASSERT(0); - break; - } - } + break; + } + case OPCODE_DCL_GS_INSTANCE_COUNT: + { + bformata(glsl, "layout(invocations = %d) in;\n", psDecl->value.ui32GSInstanceCount); + break; + } + default: + { + ASSERT(0); + break; + } + } } bool ToGLSL::TranslateSystemValue(const Operand *psOperand, const ShaderInfo::InOutSignature *sig, std::string &result, uint32_t *pui32IgnoreSwizzle, bool isIndexed, bool isInput, bool *outSkipPrefix, int *iIgnoreRedirect) { - ASSERT(sig != NULL); - if (psContext->psShader->eShaderType == HULL_SHADER && sig->semanticName == "SV_TessFactor") - { - if (pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - ASSERT(sig->ui32SemanticIndex <= 3); - std::ostringstream oss; - oss << "gl_TessLevelOuter[" << sig->ui32SemanticIndex << "]"; - result = oss.str(); - return true; - } - - if (psContext->psShader->eShaderType == HULL_SHADER && sig->semanticName == "SV_InsideTessFactor") - { - if (pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - ASSERT(sig->ui32SemanticIndex <= 1); - std::ostringstream oss; - oss << "gl_TessLevelInner[" << sig->ui32SemanticIndex << "]"; - result = oss.str(); - return true; - } - - switch (sig->eSystemValueType) - { - case NAME_POSITION: - if (psContext->psShader->eShaderType == PIXEL_SHADER) - result = "gl_FragCoord"; - else - result = "gl_Position"; - return true; - case NAME_RENDER_TARGET_ARRAY_INDEX: - result = "gl_Layer"; - if (pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - return true; - case NAME_CLIP_DISTANCE: - case NAME_CULL_DISTANCE: - { - const char* glName = sig->eSystemValueType == NAME_CLIP_DISTANCE ? "Clip" : "Cull"; - // This is always routed through temp - std::ostringstream oss; - oss << "phase" << psContext->currentPhase << "_gl" << glName << "Distance" << sig->ui32SemanticIndex; - result = oss.str(); - return true; - } - case NAME_VIEWPORT_ARRAY_INDEX: - result = "gl_ViewportIndex"; - if (pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - return true; - case NAME_VERTEX_ID: - if ((psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0) - result = "gl_VertexIndex"; - else - result = "gl_VertexID"; - if (pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - return true; - case NAME_INSTANCE_ID: - if ((psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0) - result = "gl_InstanceIndex"; - else - result = "gl_InstanceID"; - if (pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - return true; - case NAME_IS_FRONT_FACE: - if (HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) - result = "(gl_FrontFacing ? 0xffffffffu : uint(0))"; - else - result = "(gl_FrontFacing ? int(1) : int(0))"; - if (pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - return true; - case NAME_PRIMITIVE_ID: - if (isInput && psContext->psShader->eShaderType == GEOMETRY_SHADER) - result = "gl_PrimitiveIDIn"; // LOL opengl - else - result = "gl_PrimitiveID"; - if (pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - return true; - case NAME_SAMPLE_INDEX: - result = "gl_SampleID"; - if (pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - return true; - case NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR: - case NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR: - case NAME_FINAL_LINE_DENSITY_TESSFACTOR: - if (pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - if (isIndexed) - { - result = "gl_TessLevelOuter"; - return true; - } - else - { - result = "gl_TessLevelOuter[0]"; - return true; - } - case NAME_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR: - case NAME_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR: - case NAME_FINAL_LINE_DETAIL_TESSFACTOR: - result = "gl_TessLevelOuter[1]"; - if (pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - return true; - case NAME_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR: - case NAME_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR: - result = "gl_TessLevelOuter[2]"; - if (pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - return true; - case NAME_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR: - result = "gl_TessLevelOuter[3]"; - if (pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - return true; - - case NAME_FINAL_TRI_INSIDE_TESSFACTOR: - case NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR: - if (pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - if (isIndexed) - { - result = "gl_TessLevelInner"; - return true; - } - else - { - result = "gl_TessLevelInner[0]"; - return true; - } - case NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR: - result = "gl_TessLevelInner[3]"; - if (pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - return true; - default: - break; - } - - if (psContext->psShader->asPhases[psContext->currentPhase].ePhase == HS_CTRL_POINT_PHASE) - { - if (sig->semanticName == "POS" && sig->ui32SemanticIndex == 0) - { - result = "gl_out[gl_InvocationID].gl_Position"; - return true; - } - std::ostringstream oss; - if(isInput) - oss << psContext->inputPrefix << sig->semanticName << sig->ui32SemanticIndex; - else - oss << psContext->outputPrefix << sig->semanticName << sig->ui32SemanticIndex << "[gl_InvocationID]"; - result = oss.str(); - return true; - } - - // TODO: Add other builtins here. - if (sig->eSystemValueType == NAME_POSITION || (sig->semanticName == "POS" && sig->ui32SemanticIndex == 0 && psContext->psShader->eShaderType == VERTEX_SHADER)) - { - result = "gl_Position"; - return true; - } - - if (sig->semanticName == "PSIZE") - { - result = "gl_PointSize"; - if (pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - return true; - } - - return false; + ASSERT(sig != NULL); + if (psContext->psShader->eShaderType == HULL_SHADER && sig->semanticName == "SV_TessFactor") + { + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + ASSERT(sig->ui32SemanticIndex <= 3); + std::ostringstream oss; + oss << "gl_TessLevelOuter[" << sig->ui32SemanticIndex << "]"; + result = oss.str(); + return true; + } + + if (psContext->psShader->eShaderType == HULL_SHADER && sig->semanticName == "SV_InsideTessFactor") + { + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + ASSERT(sig->ui32SemanticIndex <= 1); + std::ostringstream oss; + oss << "gl_TessLevelInner[" << sig->ui32SemanticIndex << "]"; + result = oss.str(); + return true; + } + + switch (sig->eSystemValueType) + { + case NAME_POSITION: + if (psContext->psShader->eShaderType == PIXEL_SHADER) + result = "hlslcc_FragCoord"; + else + result = "gl_Position"; + return true; + case NAME_RENDER_TARGET_ARRAY_INDEX: + result = "gl_Layer"; + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + return true; + case NAME_CLIP_DISTANCE: + case NAME_CULL_DISTANCE: + { + const char* glName = sig->eSystemValueType == NAME_CLIP_DISTANCE ? "Clip" : "Cull"; + // This is always routed through temp + std::ostringstream oss; + oss << "phase" << psContext->currentPhase << "_gl" << glName << "Distance" << sig->ui32SemanticIndex; + result = oss.str(); + return true; + } + case NAME_VIEWPORT_ARRAY_INDEX: + result = "gl_ViewportIndex"; + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + return true; + case NAME_VERTEX_ID: + if ((psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0) + result = "gl_VertexIndex"; + else + result = "gl_VertexID"; + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + return true; + case NAME_INSTANCE_ID: + if ((psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0) + result = "gl_InstanceIndex"; + else + result = "gl_InstanceID"; + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + return true; + case NAME_IS_FRONT_FACE: + if (HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) + result = "(gl_FrontFacing ? 0xffffffffu : uint(0))"; // Old ES3.0 Adrenos treat 0u as const int + else + result = "(gl_FrontFacing ? 1 : 0)"; + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + return true; + case NAME_PRIMITIVE_ID: + if (isInput && psContext->psShader->eShaderType == GEOMETRY_SHADER) + result = "gl_PrimitiveIDIn"; // LOL opengl + else + result = "gl_PrimitiveID"; + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + return true; + case NAME_SAMPLE_INDEX: + result = "gl_SampleID"; + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + return true; + case NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR: + case NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR: + case NAME_FINAL_LINE_DENSITY_TESSFACTOR: + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + if (isIndexed) + { + result = "gl_TessLevelOuter"; + return true; + } + else + { + result = "gl_TessLevelOuter[0]"; + return true; + } + case NAME_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR: + case NAME_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR: + case NAME_FINAL_LINE_DETAIL_TESSFACTOR: + result = "gl_TessLevelOuter[1]"; + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + return true; + case NAME_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR: + case NAME_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR: + result = "gl_TessLevelOuter[2]"; + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + return true; + case NAME_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR: + result = "gl_TessLevelOuter[3]"; + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + return true; + + case NAME_FINAL_TRI_INSIDE_TESSFACTOR: + case NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR: + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + if (isIndexed) + { + result = "gl_TessLevelInner"; + return true; + } + else + { + result = "gl_TessLevelInner[0]"; + return true; + } + case NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR: + result = "gl_TessLevelInner[3]"; + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + return true; + default: + break; + } + + if (psContext->psShader->asPhases[psContext->currentPhase].ePhase == HS_CTRL_POINT_PHASE) + { + if (sig->semanticName == "POS" && sig->ui32SemanticIndex == 0) + { + result = "gl_out[gl_InvocationID].gl_Position"; + return true; + } + std::ostringstream oss; + if (isInput) + oss << psContext->inputPrefix << sig->semanticName << sig->ui32SemanticIndex; + else + oss << psContext->outputPrefix << sig->semanticName << sig->ui32SemanticIndex << "[gl_InvocationID]"; + result = oss.str(); + return true; + } + + // TODO: Add other builtins here. + if (sig->eSystemValueType == NAME_POSITION || (sig->semanticName == "POS" && sig->ui32SemanticIndex == 0 && psContext->psShader->eShaderType == VERTEX_SHADER)) + { + result = "gl_Position"; + return true; + } + + if (sig->semanticName == "PSIZE") + { + result = "gl_PointSize"; + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + return true; + } + + return false; } diff --git a/src/toGLSLInstruction.cpp b/src/toGLSLInstruction.cpp index 1c1dee2..a497491 100644 --- a/src/toGLSLInstruction.cpp +++ b/src/toGLSLInstruction.cpp @@ -16,8 +16,9 @@ using namespace HLSLcc; // In toGLSLDeclaration.cpp const char* GetSamplerType(HLSLCrossCompilerContext* psContext, - const RESOURCE_DIMENSION eDimension, - const uint32_t ui32RegisterNumber); + const RESOURCE_DIMENSION eDimension, + const uint32_t ui32RegisterNumber); +bool DeclareRWStructuredBufferTemplateTypeAsInteger(HLSLCrossCompilerContext* psContext, const Operand* psOperand); // This function prints out the destination name, possible destination writemask, assignment operator // and any possible conversions needed based on the eSrcType+ui32SrcElementCount (type and size of data expected to be coming in) @@ -25,1367 +26,1349 @@ const char* GetSamplerType(HLSLCrossCompilerContext* psContext, // and pSrcCount will be filled with the number of components expected // ui32CompMask can be used to only write to 1 or more components (used by MOVC) void ToGLSL::AddOpAssignToDestWithMask(const Operand* psDest, - SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, const char *szAssignmentOp, int *pNeedsParenthesis, uint32_t ui32CompMask) + SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, const char *szAssignmentOp, int *pNeedsParenthesis, uint32_t ui32CompMask) { - uint32_t ui32DestElementCount = psDest->GetNumSwizzleElements(ui32CompMask); - bstring glsl = *psContext->currentGLSLString; - SHADER_VARIABLE_TYPE eDestDataType = psDest->GetDataType(psContext); - ASSERT(pNeedsParenthesis != NULL); - - *pNeedsParenthesis = 0; - - TranslateOperand(psDest, TO_FLAG_DESTINATION, ui32CompMask); - - // Simple path: types match. - if (DoAssignmentDataTypesMatch(eDestDataType, eSrcType)) - { - // Cover cases where the HLSL language expects the rest of the components to be default-filled - // eg. MOV r0, c0.x => Temp[0] = vec4(c0.x); - if (ui32DestElementCount > ui32SrcElementCount) - { - bformata(glsl, " %s %s(", szAssignmentOp, GetConstructorForTypeGLSL(psContext, eDestDataType, ui32DestElementCount, false)); - *pNeedsParenthesis = 1; - } - else - bformata(glsl, " %s ", szAssignmentOp); - return; - } - - switch (eDestDataType) - { - case SVT_INT: - case SVT_INT12: - case SVT_INT16: - // Bitcasts from lower precisions are ambiguous - ASSERT(eSrcType != SVT_FLOAT10 && eSrcType != SVT_FLOAT16); - if (eSrcType == SVT_FLOAT && psContext->psShader->ui32MajorVersion > 3 && HaveBitEncodingOps(psContext->psShader->eTargetLanguage)) - { - bformata(glsl, " %s floatBitsToInt(", szAssignmentOp); - // Cover cases where the HLSL language expects the rest of the components to be default-filled - if (ui32DestElementCount > ui32SrcElementCount) - { - bformata(glsl, "%s(", GetConstructorForTypeGLSL(psContext, eSrcType, ui32DestElementCount, false)); - (*pNeedsParenthesis)++; - } - } - else - bformata(glsl, " %s %s(", szAssignmentOp, GetConstructorForTypeGLSL(psContext, eDestDataType, ui32DestElementCount, false)); - - (*pNeedsParenthesis)++; - break; - case SVT_UINT: - case SVT_UINT16: - ASSERT(eSrcType != SVT_FLOAT10 && eSrcType != SVT_FLOAT16); - if (eSrcType == SVT_FLOAT && psContext->psShader->ui32MajorVersion > 3 && HaveBitEncodingOps(psContext->psShader->eTargetLanguage)) - { - bformata(glsl, " %s floatBitsToUint(", szAssignmentOp); - // Cover cases where the HLSL language expects the rest of the components to be default-filled - if (ui32DestElementCount > ui32SrcElementCount) - { - bformata(glsl, "%s(", GetConstructorForTypeGLSL(psContext, eSrcType, ui32DestElementCount, false)); - (*pNeedsParenthesis)++; - } - } - else - bformata(glsl, " %s %s(", szAssignmentOp, GetConstructorForTypeGLSL(psContext, eDestDataType, ui32DestElementCount, false)); - - (*pNeedsParenthesis)++; - break; - - case SVT_FLOAT: - case SVT_FLOAT10: - case SVT_FLOAT16: - ASSERT(eSrcType != SVT_INT12 || (eSrcType != SVT_INT16 && eSrcType != SVT_UINT16)); - if (psContext->psShader->ui32MajorVersion > 3 && HaveBitEncodingOps(psContext->psShader->eTargetLanguage)) - { - if (eSrcType == SVT_INT) - bformata(glsl, " %s intBitsToFloat(", szAssignmentOp); - else - bformata(glsl, " %s uintBitsToFloat(", szAssignmentOp); - // Cover cases where the HLSL language expects the rest of the components to be default-filled - if (ui32DestElementCount > ui32SrcElementCount) - { - bformata(glsl, "%s(", GetConstructorForTypeGLSL(psContext, eSrcType, ui32DestElementCount, false)); - (*pNeedsParenthesis)++; - } - } - else - bformata(glsl, " %s %s(", szAssignmentOp, GetConstructorForTypeGLSL(psContext, eDestDataType, ui32DestElementCount, false)); - - (*pNeedsParenthesis)++; - break; - default: - // TODO: Handle bools? - ASSERT(0); - break; - } - return; + uint32_t ui32DestElementCount = psDest->GetNumSwizzleElements(ui32CompMask); + bstring glsl = *psContext->currentGLSLString; + SHADER_VARIABLE_TYPE eDestDataType = psDest->GetDataType(psContext); + ASSERT(pNeedsParenthesis != NULL); + + *pNeedsParenthesis = 0; + + TranslateOperand(psDest, TO_FLAG_DESTINATION, ui32CompMask); + + // Simple path: types match. + if (DoAssignmentDataTypesMatch(eDestDataType, eSrcType)) + { + // Cover cases where the HLSL language expects the rest of the components to be default-filled + // eg. MOV r0, c0.x => Temp[0] = vec4(c0.x); + if (ui32DestElementCount > ui32SrcElementCount) + { + bformata(glsl, " %s %s(", szAssignmentOp, GetConstructorForTypeGLSL(psContext, eDestDataType, ui32DestElementCount, false)); + *pNeedsParenthesis = 1; + } + else + bformata(glsl, " %s ", szAssignmentOp); + return; + } + + switch (eDestDataType) + { + case SVT_INT: + case SVT_INT12: + case SVT_INT16: + // Bitcasts from lower precisions are ambiguous + ASSERT(eSrcType != SVT_FLOAT10 && eSrcType != SVT_FLOAT16); + if (eSrcType == SVT_FLOAT && psContext->psShader->ui32MajorVersion > 3 && HaveBitEncodingOps(psContext->psShader->eTargetLanguage)) + { + bformata(glsl, " %s floatBitsToInt(", szAssignmentOp); + // Cover cases where the HLSL language expects the rest of the components to be default-filled + if (ui32DestElementCount > ui32SrcElementCount) + { + bformata(glsl, "%s(", GetConstructorForTypeGLSL(psContext, eSrcType, ui32DestElementCount, false)); + (*pNeedsParenthesis)++; + } + } + else + bformata(glsl, " %s %s(", szAssignmentOp, GetConstructorForTypeGLSL(psContext, eDestDataType, ui32DestElementCount, false)); + + (*pNeedsParenthesis)++; + break; + case SVT_UINT: + case SVT_UINT16: + ASSERT(eSrcType != SVT_FLOAT10 && eSrcType != SVT_FLOAT16); + if (eSrcType == SVT_FLOAT && psContext->psShader->ui32MajorVersion > 3 && HaveBitEncodingOps(psContext->psShader->eTargetLanguage)) + { + bformata(glsl, " %s floatBitsToUint(", szAssignmentOp); + // Cover cases where the HLSL language expects the rest of the components to be default-filled + if (ui32DestElementCount > ui32SrcElementCount) + { + bformata(glsl, "%s(", GetConstructorForTypeGLSL(psContext, eSrcType, ui32DestElementCount, false)); + (*pNeedsParenthesis)++; + } + } + else + bformata(glsl, " %s %s(", szAssignmentOp, GetConstructorForTypeGLSL(psContext, eDestDataType, ui32DestElementCount, false)); + + (*pNeedsParenthesis)++; + break; + + case SVT_FLOAT: + case SVT_FLOAT10: + case SVT_FLOAT16: + ASSERT(eSrcType != SVT_INT12 || (eSrcType != SVT_INT16 && eSrcType != SVT_UINT16)); + if (psContext->psShader->ui32MajorVersion > 3 && HaveBitEncodingOps(psContext->psShader->eTargetLanguage)) + { + if (eSrcType == SVT_INT) + bformata(glsl, " %s intBitsToFloat(", szAssignmentOp); + else + bformata(glsl, " %s uintBitsToFloat(", szAssignmentOp); + // Cover cases where the HLSL language expects the rest of the components to be default-filled + if (ui32DestElementCount > ui32SrcElementCount) + { + bformata(glsl, "%s(", GetConstructorForTypeGLSL(psContext, eSrcType, ui32DestElementCount, false)); + (*pNeedsParenthesis)++; + } + } + else + bformata(glsl, " %s %s(", szAssignmentOp, GetConstructorForTypeGLSL(psContext, eDestDataType, ui32DestElementCount, false)); + + (*pNeedsParenthesis)++; + break; + default: + // TODO: Handle bools? + ASSERT(0); + break; + } } void ToGLSL::AddAssignToDest(const Operand* psDest, - SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, int* pNeedsParenthesis) + SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, int* pNeedsParenthesis) { - AddOpAssignToDestWithMask(psDest, eSrcType, ui32SrcElementCount, "=", pNeedsParenthesis, OPERAND_4_COMPONENT_MASK_ALL); + AddOpAssignToDestWithMask(psDest, eSrcType, ui32SrcElementCount, "=", pNeedsParenthesis, OPERAND_4_COMPONENT_MASK_ALL); } void ToGLSL::AddAssignPrologue(int numParenthesis, bool isEmbedded /* = false*/) { - bstring glsl = *psContext->currentGLSLString; - while (numParenthesis != 0) - { - bcatcstr(glsl, ")"); - numParenthesis--; - } - if(!isEmbedded) - bcatcstr(glsl, ";\n"); - + bstring glsl = *psContext->currentGLSLString; + while (numParenthesis != 0) + { + bcatcstr(glsl, ")"); + numParenthesis--; + } + if (!isEmbedded) + bcatcstr(glsl, ";\n"); } - void ToGLSL::AddComparison(Instruction* psInst, ComparisonType eType, - uint32_t typeFlag) + uint32_t typeFlag) { - // Multiple cases to consider here: - // For shader model <=3: all comparisons are floats - // otherwise: - // OPCODE_LT, _GT, _NE etc: inputs are floats, outputs UINT 0xffffffff or 0. typeflag: TO_FLAG_NONE - // OPCODE_ILT, _IGT etc: comparisons are signed ints, outputs UINT 0xffffffff or 0 typeflag TO_FLAG_INTEGER - // _ULT, UGT etc: inputs unsigned ints, outputs UINTs typeflag TO_FLAG_UNSIGNED_INTEGER - // - // Additional complexity: if dest swizzle element count is 1, we can use normal comparison operators, otherwise glsl intrinsics. - - - bstring glsl = *psContext->currentGLSLString; - const uint32_t destElemCount = psInst->asOperands[0].GetNumSwizzleElements(); - const uint32_t s0ElemCount = psInst->asOperands[1].GetNumSwizzleElements(); - const uint32_t s1ElemCount = psInst->asOperands[2].GetNumSwizzleElements(); - int isBoolDest = psInst->asOperands[0].GetDataType(psContext) == SVT_BOOL; - - int floatResult = 0; - - ASSERT(s0ElemCount == s1ElemCount || s1ElemCount == 1 || s0ElemCount == 1); - if (s0ElemCount != s1ElemCount) - { - // Set the proper auto-expand flag is either argument is scalar - typeFlag |= (TO_AUTO_EXPAND_TO_VEC2 << (std::max(s0ElemCount, s1ElemCount) - 2)); - } - - if (psContext->psShader->ui32MajorVersion < 4) - { - floatResult = 1; - } - - if (destElemCount > 1) - { - const char* glslOpcode[] = { - "equal", - "lessThan", - "greaterThanEqual", - "notEqual", - }; - - int needsParenthesis = 0; - psContext->AddIndentation(); - if (isBoolDest) - { - TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION | TO_FLAG_BOOL); - bcatcstr(glsl, " = "); - } - else - { - AddAssignToDest(&psInst->asOperands[0], floatResult ? SVT_FLOAT : SVT_UINT, destElemCount, &needsParenthesis); - - bcatcstr(glsl, GetConstructorForTypeGLSL(psContext, floatResult ? SVT_FLOAT : SVT_UINT, destElemCount, false)); - bcatcstr(glsl, "("); - } - bformata(glsl, "%s(", glslOpcode[eType]); - TranslateOperand(&psInst->asOperands[1], typeFlag); - bcatcstr(glsl, ", "); - TranslateOperand(&psInst->asOperands[2], typeFlag); - bcatcstr(glsl, ")"); - TranslateOperandSwizzle(psContext, &psInst->asOperands[0], 0); - if (!isBoolDest) - { - bcatcstr(glsl, ")"); - if (!floatResult) - { - if (HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) - bcatcstr(glsl, " * 0xFFFFFFFFu"); - else - bcatcstr(glsl, " * 0xFFFFFFFF"); - } - } - - AddAssignPrologue(needsParenthesis); - } - else - { - const char* glslOpcode[] = { - "==", - "<", - ">=", - "!=", - }; - - //Scalar compare - - const bool workaroundAdrenoBugs = psContext->psShader->eTargetLanguage == LANG_ES_300; - - if (workaroundAdrenoBugs) - { - // Workarounds for bug cases 777617, 735299, 776827 - bcatcstr(glsl, "#ifdef UNITY_ADRENO_ES3\n"); - - int needsParenthesis = 0; - psContext->AddIndentation(); - if (isBoolDest) - { - TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION | TO_FLAG_BOOL); - bcatcstr(glsl, " = !!("); - needsParenthesis += 1; - TranslateOperand(&psInst->asOperands[1], typeFlag); - bformata(glsl, "%s", glslOpcode[eType]); - TranslateOperand(&psInst->asOperands[2], typeFlag); - AddAssignPrologue(needsParenthesis); - } - else - { - bcatcstr(glsl, "{ bool cond = "); - TranslateOperand(&psInst->asOperands[1], typeFlag); - bformata(glsl, "%s", glslOpcode[eType]); - TranslateOperand(&psInst->asOperands[2], typeFlag); - bcatcstr(glsl, "; "); - AddAssignToDest(&psInst->asOperands[0], floatResult ? SVT_FLOAT : SVT_UINT, destElemCount, &needsParenthesis); - bcatcstr(glsl, "!!cond ? "); - if (floatResult) - bcatcstr(glsl, "1.0 : 0.0"); - else - { - if (HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) - bcatcstr(glsl, "0xFFFFFFFFu : uint(0u)"); // Adreno can't handle 0u. - else - bcatcstr(glsl, "0xFFFFFFFF : int(0)"); - } - AddAssignPrologue(needsParenthesis, true); - bcatcstr(glsl, "; }\n"); - } - - bcatcstr(glsl, "#else\n"); - } - - int needsParenthesis = 0; - psContext->AddIndentation(); - if (isBoolDest) - { - TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION | TO_FLAG_BOOL); - bcatcstr(glsl, " = "); - } - else - { - AddAssignToDest(&psInst->asOperands[0], floatResult ? SVT_FLOAT : SVT_UINT, destElemCount, &needsParenthesis); - bcatcstr(glsl, "("); - } - TranslateOperand(&psInst->asOperands[1], typeFlag); - bformata(glsl, "%s", glslOpcode[eType]); - TranslateOperand(&psInst->asOperands[2], typeFlag); - if (!isBoolDest) - { - if (floatResult) - { - bcatcstr(glsl, ") ? 1.0 : 0.0"); - } - else - { - if (HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) - bcatcstr(glsl, ") ? 0xFFFFFFFFu : uint(0u)"); // Adreno can't handle 0u. - else - bcatcstr(glsl, ") ? 0xFFFFFFFF : int(0)"); - } - } - AddAssignPrologue(needsParenthesis); - - if (workaroundAdrenoBugs) - bcatcstr(glsl, "#endif\n"); - } + // Multiple cases to consider here: + // For shader model <=3: all comparisons are floats + // otherwise: + // OPCODE_LT, _GT, _NE etc: inputs are floats, outputs UINT 0xffffffff or 0. typeflag: TO_FLAG_NONE + // OPCODE_ILT, _IGT etc: comparisons are signed ints, outputs UINT 0xffffffff or 0 typeflag TO_FLAG_INTEGER + // _ULT, UGT etc: inputs unsigned ints, outputs UINTs typeflag TO_FLAG_UNSIGNED_INTEGER + // + // Additional complexity: if dest swizzle element count is 1, we can use normal comparison operators, otherwise glsl intrinsics. + + + bstring glsl = *psContext->currentGLSLString; + const uint32_t destElemCount = psInst->asOperands[0].GetNumSwizzleElements(); + const uint32_t s0ElemCount = psInst->asOperands[1].GetNumSwizzleElements(); + const uint32_t s1ElemCount = psInst->asOperands[2].GetNumSwizzleElements(); + int isBoolDest = psInst->asOperands[0].GetDataType(psContext) == SVT_BOOL; + + int floatResult = 0; + + ASSERT(s0ElemCount == s1ElemCount || s1ElemCount == 1 || s0ElemCount == 1); + if (s0ElemCount != s1ElemCount) + { + // Set the proper auto-expand flag is either argument is scalar + typeFlag |= (TO_AUTO_EXPAND_TO_VEC2 << (std::max(s0ElemCount, s1ElemCount) - 2)); + } + + if (psContext->psShader->ui32MajorVersion < 4) + { + floatResult = 1; + } + + if (destElemCount > 1) + { + const char* glslOpcode[] = { + "equal", + "lessThan", + "greaterThanEqual", + "notEqual", + }; + + int needsParenthesis = 0; + psContext->AddIndentation(); + if (isBoolDest) + { + TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION | TO_FLAG_BOOL); + bcatcstr(glsl, " = "); + } + else + { + AddAssignToDest(&psInst->asOperands[0], floatResult ? SVT_FLOAT : SVT_UINT, destElemCount, &needsParenthesis); + + bcatcstr(glsl, GetConstructorForTypeGLSL(psContext, floatResult ? SVT_FLOAT : SVT_UINT, destElemCount, false)); + bcatcstr(glsl, "("); + } + bformata(glsl, "%s(", glslOpcode[eType]); + TranslateOperand(&psInst->asOperands[1], typeFlag); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[2], typeFlag); + bcatcstr(glsl, ")"); + TranslateOperandSwizzle(psContext, &psInst->asOperands[0], 0); + if (!isBoolDest) + { + bcatcstr(glsl, ")"); + if (!floatResult) + { + if (HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) + bcatcstr(glsl, " * 0xFFFFFFFFu"); + else + bcatcstr(glsl, " * -1"); // GLSL ES 2 spec: high precision ints are guaranteed to have a range of at least (-2^16, 2^16) + } + } + + AddAssignPrologue(needsParenthesis); + } + else + { + const char* glslOpcode[] = { + "==", + "<", + ">=", + "!=", + }; + + //Scalar compare + + const bool workaroundAdrenoBugs = psContext->psShader->eTargetLanguage == LANG_ES_300; + + if (workaroundAdrenoBugs) + { + // Workarounds for bug cases 777617, 735299, 776827 + bcatcstr(glsl, "#ifdef UNITY_ADRENO_ES3\n"); + + int needsParenthesis = 0; + psContext->AddIndentation(); + if (isBoolDest) + { + TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION | TO_FLAG_BOOL); + bcatcstr(glsl, " = !!("); + needsParenthesis += 1; + TranslateOperand(&psInst->asOperands[1], typeFlag); + bformata(glsl, "%s", glslOpcode[eType]); + TranslateOperand(&psInst->asOperands[2], typeFlag); + AddAssignPrologue(needsParenthesis); + } + else + { + bcatcstr(glsl, "{ bool cond = "); + TranslateOperand(&psInst->asOperands[1], typeFlag); + bformata(glsl, "%s", glslOpcode[eType]); + TranslateOperand(&psInst->asOperands[2], typeFlag); + bcatcstr(glsl, "; "); + AddAssignToDest(&psInst->asOperands[0], floatResult ? SVT_FLOAT : SVT_UINT, destElemCount, &needsParenthesis); + bcatcstr(glsl, "!!cond ? "); + if (floatResult) + bcatcstr(glsl, "1.0 : 0.0"); + else + { + // Old ES3.0 Adrenos treat 0u as const int. + // GLSL ES 2 spec: high precision ints are guaranteed to have a range of at least (-2^16, 2^16) + bcatcstr(glsl, HaveUnsignedTypes(psContext->psShader->eTargetLanguage) ? ") ? 0xFFFFFFFFu : uint(0)" : ") ? -1 : 0"); + } + AddAssignPrologue(needsParenthesis, true); + bcatcstr(glsl, "; }\n"); + } + + bcatcstr(glsl, "#else\n"); + } + + int needsParenthesis = 0; + psContext->AddIndentation(); + if (isBoolDest) + { + TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION | TO_FLAG_BOOL); + bcatcstr(glsl, " = "); + } + else + { + AddAssignToDest(&psInst->asOperands[0], floatResult ? SVT_FLOAT : SVT_UINT, destElemCount, &needsParenthesis); + bcatcstr(glsl, "("); + } + TranslateOperand(&psInst->asOperands[1], typeFlag); + bformata(glsl, "%s", glslOpcode[eType]); + TranslateOperand(&psInst->asOperands[2], typeFlag); + if (!isBoolDest) + { + if (floatResult) + bcatcstr(glsl, ") ? 1.0 : 0.0"); + else + { + // Old ES3.0 Adrenos treat 0u as const int. + // GLSL ES 2 spec: high precision ints are guaranteed to have a range of at least (-2^16, 2^16) + bcatcstr(glsl, HaveUnsignedTypes(psContext->psShader->eTargetLanguage) ? ") ? 0xFFFFFFFFu : uint(0)" : ") ? -1 : 0"); + } + } + AddAssignPrologue(needsParenthesis); + + if (workaroundAdrenoBugs) + bcatcstr(glsl, "#endif\n"); + } } - void ToGLSL::AddMOVBinaryOp(const Operand *pDest, Operand *pSrc, bool isEmbedded /* = false*/) { - int numParenthesis = 0; - int srcSwizzleCount = pSrc->GetNumSwizzleElements(); - uint32_t writeMask = pDest->GetAccessMask(); + int numParenthesis = 0; + int srcSwizzleCount = pSrc->GetNumSwizzleElements(); + uint32_t writeMask = pDest->GetAccessMask(); - const SHADER_VARIABLE_TYPE eSrcType = pSrc->GetDataType(psContext, pDest->GetDataType(psContext)); - uint32_t flags = SVTTypeToFlag(eSrcType); + const SHADER_VARIABLE_TYPE eSrcType = pSrc->GetDataType(psContext, pDest->GetDataType(psContext)); + uint32_t flags = SVTTypeToFlag(eSrcType); - AddAssignToDest(pDest, eSrcType, srcSwizzleCount, &numParenthesis); - TranslateOperand(pSrc, flags, writeMask); + AddAssignToDest(pDest, eSrcType, srcSwizzleCount, &numParenthesis); + TranslateOperand(pSrc, flags, writeMask); - AddAssignPrologue(numParenthesis, isEmbedded); + AddAssignPrologue(numParenthesis, isEmbedded); } void ToGLSL::AddMOVCBinaryOp(const Operand *pDest, const Operand *src0, Operand *src1, Operand *src2) { - bstring glsl = *psContext->currentGLSLString; - uint32_t destElemCount = pDest->GetNumSwizzleElements(); - uint32_t s0ElemCount = src0->GetNumSwizzleElements(); - uint32_t s1ElemCount = src1->GetNumSwizzleElements(); - uint32_t s2ElemCount = src2->GetNumSwizzleElements(); - uint32_t destWriteMask = pDest->GetAccessMask(); - uint32_t destElem; - - const SHADER_VARIABLE_TYPE eDestType = pDest->GetDataType(psContext); - /* - for each component in dest[.mask] - if the corresponding component in src0 (POS-swizzle) - has any bit set - { - copy this component (POS-swizzle) from src1 into dest - } - else - { - copy this component (POS-swizzle) from src2 into dest - } - endfor - */ - - /* Single-component conditional variable (src0) */ - if (s0ElemCount == 1 || src0->IsSwizzleReplicated()) - { - int numParenthesis = 0; - SHADER_VARIABLE_TYPE s0Type = src0->GetDataType(psContext); - psContext->AddIndentation(); - AddAssignToDest(pDest, eDestType, destElemCount, &numParenthesis); - bcatcstr(glsl, "("); - if (s0Type == SVT_UINT || s0Type == SVT_UINT16) - TranslateOperand(src0, TO_AUTO_BITCAST_TO_UINT, OPERAND_4_COMPONENT_MASK_X); - else if (s0Type == SVT_BOOL) - TranslateOperand(src0, TO_FLAG_BOOL, OPERAND_4_COMPONENT_MASK_X); - else - TranslateOperand(src0, TO_AUTO_BITCAST_TO_INT, OPERAND_4_COMPONENT_MASK_X); - - if (psContext->psShader->ui32MajorVersion < 4) - { - //cmp opcode uses >= 0 - bcatcstr(glsl, " >= 0) ? "); - } - else - { - if (s0Type == SVT_UINT || s0Type == SVT_UINT16) - if (HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) - bcatcstr(glsl, " != uint(0u)) ? "); // Adreno doesn't understand 0u. - else - bcatcstr(glsl, " != int(0)) ? "); - else if (s0Type == SVT_BOOL) - bcatcstr(glsl, ") ? "); - else - bcatcstr(glsl, " != 0) ? "); - } - - if (s1ElemCount == 1 && destElemCount > 1) - TranslateOperand(src1, SVTTypeToFlag(eDestType) | ElemCountToAutoExpandFlag(destElemCount)); - else - TranslateOperand(src1, SVTTypeToFlag(eDestType), destWriteMask); - - bcatcstr(glsl, " : "); - if (s2ElemCount == 1 && destElemCount > 1) - TranslateOperand(src2, SVTTypeToFlag(eDestType) | ElemCountToAutoExpandFlag(destElemCount)); - else - TranslateOperand(src2, SVTTypeToFlag(eDestType), destWriteMask); - - AddAssignPrologue(numParenthesis); - } - else - { - // TODO: We can actually do this in one op using mix(). - int srcElem = -1; - SHADER_VARIABLE_TYPE s0Type = src0->GetDataType(psContext); - - // Use an extra temp if dest is also one of the sources. Without this some swizzle combinations - // might alter the source before all components are handled. - const char* tempName = "hlslcc_movcTemp"; - bool dstIsSrc1 = (pDest->eType == src1->eType) && (pDest->ui32RegisterNumber == src1->ui32RegisterNumber); - bool dstIsSrc2 = (pDest->eType == src2->eType) && (pDest->ui32RegisterNumber == src2->ui32RegisterNumber); - - if (dstIsSrc1 || dstIsSrc2) - { - psContext->AddIndentation(); - bcatcstr(glsl, "{\n"); - ++psContext->indent; - psContext->AddIndentation(); + bstring glsl = *psContext->currentGLSLString; + uint32_t destElemCount = pDest->GetNumSwizzleElements(); + uint32_t s0ElemCount = src0->GetNumSwizzleElements(); + uint32_t s1ElemCount = src1->GetNumSwizzleElements(); + uint32_t s2ElemCount = src2->GetNumSwizzleElements(); + uint32_t destWriteMask = pDest->GetAccessMask(); + uint32_t destElem; + + const SHADER_VARIABLE_TYPE eDestType = pDest->GetDataType(psContext); + /* + for each component in dest[.mask] + if the corresponding component in src0 (POS-swizzle) + has any bit set + { + copy this component (POS-swizzle) from src1 into dest + } + else + { + copy this component (POS-swizzle) from src2 into dest + } + endfor + */ + + /* Single-component conditional variable (src0) */ + if (s0ElemCount == 1 || src0->IsSwizzleReplicated()) + { + int numParenthesis = 0; + SHADER_VARIABLE_TYPE s0Type = src0->GetDataType(psContext); + psContext->AddIndentation(); + AddAssignToDest(pDest, eDestType, destElemCount, &numParenthesis); + bcatcstr(glsl, "("); + if (s0Type == SVT_UINT || s0Type == SVT_UINT16) + TranslateOperand(src0, TO_AUTO_BITCAST_TO_UINT, OPERAND_4_COMPONENT_MASK_X); + else if (s0Type == SVT_BOOL) + TranslateOperand(src0, TO_FLAG_BOOL, OPERAND_4_COMPONENT_MASK_X); + else + TranslateOperand(src0, TO_AUTO_BITCAST_TO_INT, OPERAND_4_COMPONENT_MASK_X); + + if (psContext->psShader->ui32MajorVersion < 4) + { + //cmp opcode uses >= 0 + bcatcstr(glsl, " >= 0) ? "); + } + else + { + if (s0Type == SVT_UINT || s0Type == SVT_UINT16) + bcatcstr(glsl, HaveUnsignedTypes(psContext->psShader->eTargetLanguage) ? " != uint(0)) ? " : " != 0) ? "); // Old ES3.0 Adrenos treat 0u as const int. + else if (s0Type == SVT_BOOL) + bcatcstr(glsl, ") ? "); + else + bcatcstr(glsl, " != 0) ? "); + } + + if (s1ElemCount == 1 && destElemCount > 1) + TranslateOperand(src1, SVTTypeToFlag(eDestType) | ElemCountToAutoExpandFlag(destElemCount)); + else + TranslateOperand(src1, SVTTypeToFlag(eDestType), destWriteMask); + + bcatcstr(glsl, " : "); + if (s2ElemCount == 1 && destElemCount > 1) + TranslateOperand(src2, SVTTypeToFlag(eDestType) | ElemCountToAutoExpandFlag(destElemCount)); + else + TranslateOperand(src2, SVTTypeToFlag(eDestType), destWriteMask); + + AddAssignPrologue(numParenthesis); + } + else + { + // TODO: We can actually do this in one op using mix(). + int srcElem = -1; + SHADER_VARIABLE_TYPE dstType = pDest->GetDataType(psContext); + SHADER_VARIABLE_TYPE s0Type = src0->GetDataType(psContext); + + // Use an extra temp if dest is also one of the sources. Without this some swizzle combinations + // might alter the source before all components are handled. + const std::string tempName = "hlslcc_movcTemp"; + bool dstIsSrc1 = (pDest->eType == src1->eType) + && (dstType == src1->GetDataType(psContext)) + && (pDest->ui32RegisterNumber == src1->ui32RegisterNumber); + bool dstIsSrc2 = (pDest->eType == src2->eType) + && (dstType == src2->GetDataType(psContext)) + && (pDest->ui32RegisterNumber == src2->ui32RegisterNumber); + + if (dstIsSrc1 || dstIsSrc2) + { + psContext->AddIndentation(); + bcatcstr(glsl, "{\n"); + ++psContext->indent; + psContext->AddIndentation(); int numComponents = (pDest->eType == OPERAND_TYPE_TEMP) ? psContext->psShader->GetTempComponentCount(eDestType, pDest->ui32RegisterNumber) : pDest->iNumComponents; - bformata(glsl, "%s %s = ", HLSLcc::GetConstructorForType(psContext, eDestType, numComponents), tempName); - TranslateOperand(glsl, pDest, TO_FLAG_NAME_ONLY); - bcatcstr(glsl, ";\n"); - } - - for (destElem = 0; destElem < 4; ++destElem) - { - int numParenthesis = 0; - srcElem++; - if (pDest->eSelMode == OPERAND_4_COMPONENT_MASK_MODE && pDest->ui32CompMask != 0 && !(pDest->ui32CompMask & (1 << destElem))) - continue; - - psContext->AddIndentation(); - AddOpAssignToDestWithMask(pDest, eDestType, 1, "=", &numParenthesis, 1 << destElem); - bcatcstr(glsl, "("); - if (s0Type == SVT_BOOL) - { - TranslateOperand(src0, TO_FLAG_BOOL, 1 << srcElem); - bcatcstr(glsl, ") ? "); - } - else - { - TranslateOperand(src0, TO_AUTO_BITCAST_TO_INT, 1 << srcElem); - - if (psContext->psShader->ui32MajorVersion < 4) - { - //cmp opcode uses >= 0 - bcatcstr(glsl, " >= 0) ? "); - } - else - { - bcatcstr(glsl, " != 0) ? "); - } - } - - if (!dstIsSrc1) - TranslateOperand(src1, SVTTypeToFlag(eDestType), 1 << srcElem); - else - { - bformata(glsl, "%s", tempName); - TranslateOperandSwizzleWithMask(glsl, psContext, src1, 1 << srcElem, 0); - } - - bcatcstr(glsl, " : "); - - if (!dstIsSrc2) - TranslateOperand(src2, SVTTypeToFlag(eDestType), 1 << srcElem); - else - { - bformata(glsl, "%s", tempName); - TranslateOperandSwizzleWithMask(glsl, psContext, src2, 1 << srcElem, 0); - } - - AddAssignPrologue(numParenthesis); - } - - if (dstIsSrc1 || dstIsSrc2) - { - --psContext->indent; - psContext->AddIndentation(); - bcatcstr(glsl, "}\n"); - } - } + + const char* constructorStr = HLSLcc::GetConstructorForType(psContext, eDestType, numComponents, false); + bformata(glsl, "%s %s = ", constructorStr, tempName.c_str()); + TranslateOperand(pDest, TO_FLAG_NAME_ONLY); + bformata(glsl, ";\n"); + + // Override OPERAND_TYPE_TEMP name temporarily + const_cast(pDest)->specialName.assign(tempName); + } + + for (destElem = 0; destElem < 4; ++destElem) + { + int numParenthesis = 0; + srcElem++; + if (pDest->eSelMode == OPERAND_4_COMPONENT_MASK_MODE && pDest->ui32CompMask != 0 && !(pDest->ui32CompMask & (1 << destElem))) + continue; + + psContext->AddIndentation(); + AddOpAssignToDestWithMask(pDest, eDestType, 1, "=", &numParenthesis, 1 << destElem); + bcatcstr(glsl, "("); + if (s0Type == SVT_BOOL) + { + TranslateOperand(src0, TO_FLAG_BOOL, 1 << srcElem); + bcatcstr(glsl, ") ? "); + } + else + { + TranslateOperand(src0, TO_AUTO_BITCAST_TO_INT, 1 << srcElem); + + if (psContext->psShader->ui32MajorVersion < 4) + { + //cmp opcode uses >= 0 + bcatcstr(glsl, " >= 0) ? "); + } + else + { + bcatcstr(glsl, " != 0) ? "); + } + } + + TranslateOperand(src1, SVTTypeToFlag(eDestType), 1 << srcElem); + bcatcstr(glsl, " : "); + TranslateOperand(src2, SVTTypeToFlag(eDestType), 1 << srcElem); + AddAssignPrologue(numParenthesis); + } + + if (dstIsSrc1 || dstIsSrc2) + { + const_cast(pDest)->specialName.clear(); + + psContext->AddIndentation(); + TranslateOperand(glsl, pDest, TO_FLAG_NAME_ONLY); + bformata(glsl, " = %s;\n", tempName.c_str()); + + --psContext->indent; + psContext->AddIndentation(); + bcatcstr(glsl, "}\n"); + } + } } void ToGLSL::CallBinaryOp(const char* name, Instruction* psInst, - int dest, int src0, int src1, SHADER_VARIABLE_TYPE eDataType, bool isEmbedded /* = false*/) + int dest, int src0, int src1, SHADER_VARIABLE_TYPE eDataType, bool isEmbedded /* = false*/) { - uint32_t ui32Flags = SVTTypeToFlag(eDataType); - bstring glsl = *psContext->currentGLSLString; - uint32_t destMask = psInst->asOperands[dest].GetAccessMask(); - uint32_t src1SwizCount = psInst->asOperands[src1].GetNumSwizzleElements(destMask); - uint32_t src0SwizCount = psInst->asOperands[src0].GetNumSwizzleElements(destMask); - uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); - uint32_t src0AccessMask = psInst->asOperands[src0].GetAccessMask(); - uint32_t src1AccessMask = psInst->asOperands[src1].GetAccessMask(); - uint32_t src0AccessCount = GetNumberBitsSet(src0AccessMask); - uint32_t src1AccessCount = GetNumberBitsSet(src1AccessMask); - int needsParenthesis = 0; - - if (!HaveNativeBitwiseOps(psContext->psShader->eTargetLanguage)) - { - const char *binaryOpWrap = NULL; - - if (!strcmp("%", name)) - binaryOpWrap = "op_modi"; - else if (!strcmp("&", name)) - binaryOpWrap = "op_and"; - else if (!strcmp("|", name)) - binaryOpWrap = "op_or"; - else if (!strcmp("^", name)) - binaryOpWrap = "op_xor"; - else if (!strcmp(">>", name)) - binaryOpWrap = "op_shr"; - else if (!strcmp("<<", name)) - binaryOpWrap = "op_shl"; - // op_not handled separately at OPCODE_NOT - - if (binaryOpWrap) - { - UseExtraFunctionDependency(binaryOpWrap); - CallHelper2Int(binaryOpWrap, psInst, 0, 1, 2, 1); - return; - } - } - - if (src1SwizCount != src0SwizCount) - { - uint32_t maxElems = std::max(src1SwizCount, src0SwizCount); - ui32Flags |= (TO_AUTO_EXPAND_TO_VEC2 << (maxElems - 2)); - } - - if(!isEmbedded) - psContext->AddIndentation(); - - AddAssignToDest(&psInst->asOperands[dest], eDataType, dstSwizCount, &needsParenthesis); - - // Horrible Adreno bug workaround: - // All pre-ES3.1 Adreno GLES3.0 drivers fail in cases like this: - // vec4 a.xyz = b.xyz + c.yzw; - // Attempt to detect this and fall back to component-wise binary op. - if ( (psContext->psShader->eTargetLanguage == LANG_ES_300) && - ((src0AccessCount > 1 && !(src0AccessMask & OPERAND_4_COMPONENT_MASK_X)) || (src1AccessCount > 1 && !(src1AccessMask & OPERAND_4_COMPONENT_MASK_X)))) - { - uint32_t i; - int firstPrinted = 0; - bcatcstr(glsl, GetConstructorForTypeGLSL(psContext, eDataType, dstSwizCount, false)); - bcatcstr(glsl, "("); - for (i = 0; i < 4; i++) - { - if (!(destMask & (1 << i))) - continue; - - if (firstPrinted != 0) - bcatcstr(glsl, ", "); - else - firstPrinted = 1; - - // Remove the auto expand flags - ui32Flags &= ~(TO_AUTO_EXPAND_TO_VEC2 | TO_AUTO_EXPAND_TO_VEC3 | TO_AUTO_EXPAND_TO_VEC4); - - TranslateOperand(&psInst->asOperands[src0], ui32Flags, 1 << i); - bformata(glsl, " %s ", name); - TranslateOperand(&psInst->asOperands[src1], ui32Flags, 1 << i); - } - bcatcstr(glsl, ")"); - } - else - { - TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); - bformata(glsl, " %s ", name); - TranslateOperand(&psInst->asOperands[src1], ui32Flags, destMask); - } - AddAssignPrologue(needsParenthesis, isEmbedded); + uint32_t ui32Flags = SVTTypeToFlag(eDataType); + bstring glsl = *psContext->currentGLSLString; + uint32_t destMask = psInst->asOperands[dest].GetAccessMask(); + uint32_t src1SwizCount = psInst->asOperands[src1].GetNumSwizzleElements(destMask); + uint32_t src0SwizCount = psInst->asOperands[src0].GetNumSwizzleElements(destMask); + uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); + int needsParenthesis = 0; + + if (!HaveNativeBitwiseOps(psContext->psShader->eTargetLanguage)) + { + const char *binaryOpWrap = NULL; + + if (!strcmp("%", name)) + binaryOpWrap = "op_modi"; + else if (!strcmp("&", name)) + binaryOpWrap = "op_and"; + else if (!strcmp("|", name)) + binaryOpWrap = "op_or"; + else if (!strcmp("^", name)) + binaryOpWrap = "op_xor"; + else if (!strcmp(">>", name)) + binaryOpWrap = "op_shr"; + else if (!strcmp("<<", name)) + binaryOpWrap = "op_shl"; + // op_not handled separately at OPCODE_NOT + + if (binaryOpWrap) + { + UseExtraFunctionDependency(binaryOpWrap); + CallHelper2Int(binaryOpWrap, psInst, 0, 1, 2, 1); + return; + } + } + + if (src1SwizCount != src0SwizCount) + { + uint32_t maxElems = std::max(src1SwizCount, src0SwizCount); + ui32Flags |= (TO_AUTO_EXPAND_TO_VEC2 << (maxElems - 2)); + } + + if (!isEmbedded) + psContext->AddIndentation(); + + AddAssignToDest(&psInst->asOperands[dest], eDataType, dstSwizCount, &needsParenthesis); + + // Adreno 3xx fails on binary ops that operate on vectors + bool opComponentWiseOnAdreno = (!strcmp("&", name) || !strcmp("|", name) || !strcmp("^", name) || !strcmp(">>", name) || !strcmp("<<", name)); + if (psContext->psShader->eTargetLanguage == LANG_ES_300 && opComponentWiseOnAdreno) + { + uint32_t i; + int firstPrinted = 0; + bcatcstr(glsl, GetConstructorForTypeGLSL(psContext, eDataType, dstSwizCount, false)); + bcatcstr(glsl, "("); + for (i = 0; i < 4; i++) + { + if (!(destMask & (1 << i))) + continue; + + if (firstPrinted != 0) + bcatcstr(glsl, ", "); + else + firstPrinted = 1; + + // Remove the auto expand flags + ui32Flags &= ~(TO_AUTO_EXPAND_TO_VEC2 | TO_AUTO_EXPAND_TO_VEC3 | TO_AUTO_EXPAND_TO_VEC4); + + TranslateOperand(&psInst->asOperands[src0], ui32Flags, 1 << i); + bformata(glsl, " %s ", name); + TranslateOperand(&psInst->asOperands[src1], ui32Flags, 1 << i); + } + bcatcstr(glsl, ")"); + } + else + { + TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); + bformata(glsl, " %s ", name); + TranslateOperand(&psInst->asOperands[src1], ui32Flags, destMask); + } + + AddAssignPrologue(needsParenthesis, isEmbedded); } void ToGLSL::CallTernaryOp(const char* op1, const char* op2, Instruction* psInst, - int dest, int src0, int src1, int src2, uint32_t dataType) + int dest, int src0, int src1, int src2, uint32_t dataType) { - bstring glsl = *psContext->currentGLSLString; - uint32_t destMask = psInst->asOperands[dest].GetAccessMask(); - uint32_t src2SwizCount = psInst->asOperands[src2].GetNumSwizzleElements(destMask); - uint32_t src1SwizCount = psInst->asOperands[src1].GetNumSwizzleElements(destMask); - uint32_t src0SwizCount = psInst->asOperands[src0].GetNumSwizzleElements(destMask); - uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); - - uint32_t ui32Flags = dataType; - int numParenthesis = 0; - - if (src1SwizCount != src0SwizCount || src2SwizCount != src0SwizCount) - { - uint32_t maxElems = std::max(src2SwizCount, std::max(src1SwizCount, src0SwizCount)); - ui32Flags |= (TO_AUTO_EXPAND_TO_VEC2 << (maxElems - 2)); - } - - psContext->AddIndentation(); - - AddAssignToDest(&psInst->asOperands[dest], TypeFlagsToSVTType(dataType), dstSwizCount, &numParenthesis); - - TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); - bformata(glsl, " %s ", op1); - TranslateOperand(&psInst->asOperands[src1], ui32Flags, destMask); - bformata(glsl, " %s ", op2); - TranslateOperand(&psInst->asOperands[src2], ui32Flags, destMask); - AddAssignPrologue(numParenthesis); + bstring glsl = *psContext->currentGLSLString; + uint32_t destMask = psInst->asOperands[dest].GetAccessMask(); + uint32_t src2SwizCount = psInst->asOperands[src2].GetNumSwizzleElements(destMask); + uint32_t src1SwizCount = psInst->asOperands[src1].GetNumSwizzleElements(destMask); + uint32_t src0SwizCount = psInst->asOperands[src0].GetNumSwizzleElements(destMask); + uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); + + uint32_t ui32Flags = dataType; + int numParenthesis = 0; + + if (src1SwizCount != src0SwizCount || src2SwizCount != src0SwizCount) + { + uint32_t maxElems = std::max(src2SwizCount, std::max(src1SwizCount, src0SwizCount)); + ui32Flags |= (TO_AUTO_EXPAND_TO_VEC2 << (maxElems - 2)); + } + + psContext->AddIndentation(); + + AddAssignToDest(&psInst->asOperands[dest], TypeFlagsToSVTType(dataType), dstSwizCount, &numParenthesis); + + TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); + bformata(glsl, " %s ", op1); + TranslateOperand(&psInst->asOperands[src1], ui32Flags, destMask); + bformata(glsl, " %s ", op2); + TranslateOperand(&psInst->asOperands[src2], ui32Flags, destMask); + AddAssignPrologue(numParenthesis); } void ToGLSL::CallHelper3(const char* name, Instruction* psInst, - int dest, int src0, int src1, int src2, int paramsShouldFollowWriteMask) + int dest, int src0, int src1, int src2, int paramsShouldFollowWriteMask) { - uint32_t ui32Flags = TO_AUTO_BITCAST_TO_FLOAT; - bstring glsl = *psContext->currentGLSLString; - uint32_t destMask = paramsShouldFollowWriteMask ? psInst->asOperands[dest].GetAccessMask() : OPERAND_4_COMPONENT_MASK_ALL; - uint32_t src2SwizCount = psInst->asOperands[src2].GetNumSwizzleElements(destMask); - uint32_t src1SwizCount = psInst->asOperands[src1].GetNumSwizzleElements(destMask); - uint32_t src0SwizCount = psInst->asOperands[src0].GetNumSwizzleElements(destMask); - uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); - int numParenthesis = 0; - - if ((src1SwizCount != src0SwizCount || src2SwizCount != src0SwizCount) && paramsShouldFollowWriteMask) - { - uint32_t maxElems = std::max(src2SwizCount, std::max(src1SwizCount, src0SwizCount)); - ui32Flags |= (TO_AUTO_EXPAND_TO_VEC2 << (maxElems - 2)); - } - - psContext->AddIndentation(); - - AddAssignToDest(&psInst->asOperands[dest], SVT_FLOAT, dstSwizCount, &numParenthesis); - - bformata(glsl, "%s(", name); - numParenthesis++; - TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); - bcatcstr(glsl, ", "); - TranslateOperand(&psInst->asOperands[src1], ui32Flags, destMask); - bcatcstr(glsl, ", "); - TranslateOperand(&psInst->asOperands[src2], ui32Flags, destMask); - AddAssignPrologue(numParenthesis); + uint32_t ui32Flags = TO_AUTO_BITCAST_TO_FLOAT; + bstring glsl = *psContext->currentGLSLString; + uint32_t destMask = paramsShouldFollowWriteMask ? psInst->asOperands[dest].GetAccessMask() : OPERAND_4_COMPONENT_MASK_ALL; + uint32_t src2SwizCount = psInst->asOperands[src2].GetNumSwizzleElements(destMask); + uint32_t src1SwizCount = psInst->asOperands[src1].GetNumSwizzleElements(destMask); + uint32_t src0SwizCount = psInst->asOperands[src0].GetNumSwizzleElements(destMask); + uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); + int numParenthesis = 0; + + if ((src1SwizCount != src0SwizCount || src2SwizCount != src0SwizCount) && paramsShouldFollowWriteMask) + { + uint32_t maxElems = std::max(src2SwizCount, std::max(src1SwizCount, src0SwizCount)); + ui32Flags |= (TO_AUTO_EXPAND_TO_VEC2 << (maxElems - 2)); + } + + psContext->AddIndentation(); + + AddAssignToDest(&psInst->asOperands[dest], SVT_FLOAT, dstSwizCount, &numParenthesis); + + bformata(glsl, "%s(", name); + numParenthesis++; + TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[src1], ui32Flags, destMask); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[src2], ui32Flags, destMask); + AddAssignPrologue(numParenthesis); } void ToGLSL::CallHelper2(const char* name, Instruction* psInst, - int dest, int src0, int src1, int paramsShouldFollowWriteMask) + int dest, int src0, int src1, int paramsShouldFollowWriteMask) { - uint32_t ui32Flags = TO_AUTO_BITCAST_TO_FLOAT; - bstring glsl = *psContext->currentGLSLString; - uint32_t destMask = paramsShouldFollowWriteMask ? psInst->asOperands[dest].GetAccessMask() : OPERAND_4_COMPONENT_MASK_ALL; - uint32_t src1SwizCount = psInst->asOperands[src1].GetNumSwizzleElements(destMask); - uint32_t src0SwizCount = psInst->asOperands[src0].GetNumSwizzleElements(destMask); - uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); + uint32_t ui32Flags = TO_AUTO_BITCAST_TO_FLOAT; + bstring glsl = *psContext->currentGLSLString; + uint32_t destMask = paramsShouldFollowWriteMask ? psInst->asOperands[dest].GetAccessMask() : OPERAND_4_COMPONENT_MASK_ALL; + uint32_t src1SwizCount = psInst->asOperands[src1].GetNumSwizzleElements(destMask); + uint32_t src0SwizCount = psInst->asOperands[src0].GetNumSwizzleElements(destMask); + uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); - int isDotProduct = (strncmp(name, "dot", 3) == 0) ? 1 : 0; - int numParenthesis = 0; + int isDotProduct = (strncmp(name, "dot", 3) == 0) ? 1 : 0; + int numParenthesis = 0; - if ((src1SwizCount != src0SwizCount) && paramsShouldFollowWriteMask) - { - uint32_t maxElems = std::max(src1SwizCount, src0SwizCount); - ui32Flags |= (TO_AUTO_EXPAND_TO_VEC2 << (maxElems - 2)); - } + if ((src1SwizCount != src0SwizCount) && paramsShouldFollowWriteMask) + { + uint32_t maxElems = std::max(src1SwizCount, src0SwizCount); + ui32Flags |= (TO_AUTO_EXPAND_TO_VEC2 << (maxElems - 2)); + } - psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[dest], SVT_FLOAT, isDotProduct ? 1 : dstSwizCount, &numParenthesis); + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[dest], SVT_FLOAT, isDotProduct ? 1 : dstSwizCount, &numParenthesis); - bformata(glsl, "%s(", name); - numParenthesis++; + bformata(glsl, "%s(", name); + numParenthesis++; - TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); - bcatcstr(glsl, ", "); - TranslateOperand(&psInst->asOperands[src1], ui32Flags, destMask); + TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[src1], ui32Flags, destMask); - AddAssignPrologue(numParenthesis); + AddAssignPrologue(numParenthesis); } void ToGLSL::CallHelper2Int(const char* name, Instruction* psInst, - int dest, int src0, int src1, int paramsShouldFollowWriteMask) + int dest, int src0, int src1, int paramsShouldFollowWriteMask) { - uint32_t ui32Flags = TO_AUTO_BITCAST_TO_INT; - bstring glsl = *psContext->currentGLSLString; - uint32_t destMask = paramsShouldFollowWriteMask ? psInst->asOperands[dest].GetAccessMask() : OPERAND_4_COMPONENT_MASK_ALL; - uint32_t src1SwizCount = psInst->asOperands[src1].GetNumSwizzleElements(destMask); - uint32_t src0SwizCount = psInst->asOperands[src0].GetNumSwizzleElements(destMask); - uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); - int numParenthesis = 0; - - if ((src1SwizCount != src0SwizCount) && paramsShouldFollowWriteMask) - { - uint32_t maxElems = std::max(src1SwizCount, src0SwizCount); - ui32Flags |= (TO_AUTO_EXPAND_TO_VEC2 << (maxElems - 2)); - } - - psContext->AddIndentation(); - - AddAssignToDest(&psInst->asOperands[dest], SVT_INT, dstSwizCount, &numParenthesis); - - bformata(glsl, "%s(", name); - numParenthesis++; - TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); - bcatcstr(glsl, ", "); - TranslateOperand(&psInst->asOperands[src1], ui32Flags, destMask); - AddAssignPrologue(numParenthesis); + uint32_t ui32Flags = TO_AUTO_BITCAST_TO_INT; + bstring glsl = *psContext->currentGLSLString; + uint32_t destMask = paramsShouldFollowWriteMask ? psInst->asOperands[dest].GetAccessMask() : OPERAND_4_COMPONENT_MASK_ALL; + uint32_t src1SwizCount = psInst->asOperands[src1].GetNumSwizzleElements(destMask); + uint32_t src0SwizCount = psInst->asOperands[src0].GetNumSwizzleElements(destMask); + uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); + int numParenthesis = 0; + + if ((src1SwizCount != src0SwizCount) && paramsShouldFollowWriteMask) + { + uint32_t maxElems = std::max(src1SwizCount, src0SwizCount); + ui32Flags |= (TO_AUTO_EXPAND_TO_VEC2 << (maxElems - 2)); + } + + psContext->AddIndentation(); + + AddAssignToDest(&psInst->asOperands[dest], SVT_INT, dstSwizCount, &numParenthesis); + + bformata(glsl, "%s(", name); + numParenthesis++; + TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[src1], ui32Flags, destMask); + AddAssignPrologue(numParenthesis); } void ToGLSL::CallHelper2UInt(const char* name, Instruction* psInst, - int dest, int src0, int src1, int paramsShouldFollowWriteMask) + int dest, int src0, int src1, int paramsShouldFollowWriteMask) { - uint32_t ui32Flags = TO_AUTO_BITCAST_TO_UINT; - bstring glsl = *psContext->currentGLSLString; - uint32_t destMask = paramsShouldFollowWriteMask ? psInst->asOperands[dest].GetAccessMask() : OPERAND_4_COMPONENT_MASK_ALL; - uint32_t src1SwizCount = psInst->asOperands[src1].GetNumSwizzleElements(destMask); - uint32_t src0SwizCount = psInst->asOperands[src0].GetNumSwizzleElements(destMask); - uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); - int numParenthesis = 0; - - if ((src1SwizCount != src0SwizCount) && paramsShouldFollowWriteMask) - { - uint32_t maxElems = std::max(src1SwizCount, src0SwizCount); - ui32Flags |= (TO_AUTO_EXPAND_TO_VEC2 << (maxElems - 2)); - } - - psContext->AddIndentation(); - - AddAssignToDest(&psInst->asOperands[dest], SVT_UINT, dstSwizCount, &numParenthesis); - - bformata(glsl, "%s(", name); - numParenthesis++; - TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); - bcatcstr(glsl, ", "); - TranslateOperand(&psInst->asOperands[src1], ui32Flags, destMask); - AddAssignPrologue(numParenthesis); + uint32_t ui32Flags = TO_AUTO_BITCAST_TO_UINT; + bstring glsl = *psContext->currentGLSLString; + uint32_t destMask = paramsShouldFollowWriteMask ? psInst->asOperands[dest].GetAccessMask() : OPERAND_4_COMPONENT_MASK_ALL; + uint32_t src1SwizCount = psInst->asOperands[src1].GetNumSwizzleElements(destMask); + uint32_t src0SwizCount = psInst->asOperands[src0].GetNumSwizzleElements(destMask); + uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); + int numParenthesis = 0; + + if ((src1SwizCount != src0SwizCount) && paramsShouldFollowWriteMask) + { + uint32_t maxElems = std::max(src1SwizCount, src0SwizCount); + ui32Flags |= (TO_AUTO_EXPAND_TO_VEC2 << (maxElems - 2)); + } + + psContext->AddIndentation(); + + AddAssignToDest(&psInst->asOperands[dest], SVT_UINT, dstSwizCount, &numParenthesis); + + bformata(glsl, "%s(", name); + numParenthesis++; + TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[src1], ui32Flags, destMask); + AddAssignPrologue(numParenthesis); } void ToGLSL::CallHelper1(const char* name, Instruction* psInst, - int dest, int src0, int paramsShouldFollowWriteMask) + int dest, int src0, int paramsShouldFollowWriteMask) { - uint32_t ui32Flags = TO_AUTO_BITCAST_TO_FLOAT; - bstring glsl = *psContext->currentGLSLString; - uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); - uint32_t destMask = paramsShouldFollowWriteMask ? psInst->asOperands[dest].GetAccessMask() : OPERAND_4_COMPONENT_MASK_ALL; - int numParenthesis = 0; + uint32_t ui32Flags = TO_AUTO_BITCAST_TO_FLOAT; + bstring glsl = *psContext->currentGLSLString; + uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); + uint32_t destMask = paramsShouldFollowWriteMask ? psInst->asOperands[dest].GetAccessMask() : OPERAND_4_COMPONENT_MASK_ALL; + int numParenthesis = 0; - psContext->AddIndentation(); + psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[dest], SVT_FLOAT, dstSwizCount, &numParenthesis); + AddAssignToDest(&psInst->asOperands[dest], SVT_FLOAT, dstSwizCount, &numParenthesis); - bformata(glsl, "%s(", name); - numParenthesis++; - TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); - AddAssignPrologue(numParenthesis); + bformata(glsl, "%s(", name); + numParenthesis++; + TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); + AddAssignPrologue(numParenthesis); } //Result is an int. void ToGLSL::CallHelper1Int( - const char* name, - Instruction* psInst, - const int dest, - const int src0, - int paramsShouldFollowWriteMask) + const char* name, + Instruction* psInst, + const int dest, + const int src0, + int paramsShouldFollowWriteMask) { - uint32_t ui32Flags = TO_AUTO_BITCAST_TO_INT; - bstring glsl = *psContext->currentGLSLString; - uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); - uint32_t destMask = paramsShouldFollowWriteMask ? psInst->asOperands[dest].GetAccessMask() : OPERAND_4_COMPONENT_MASK_ALL; - int numParenthesis = 0; + uint32_t ui32Flags = TO_AUTO_BITCAST_TO_INT; + bstring glsl = *psContext->currentGLSLString; + uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); + uint32_t destMask = paramsShouldFollowWriteMask ? psInst->asOperands[dest].GetAccessMask() : OPERAND_4_COMPONENT_MASK_ALL; + int numParenthesis = 0; - psContext->AddIndentation(); + psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[dest], SVT_INT, dstSwizCount, &numParenthesis); + AddAssignToDest(&psInst->asOperands[dest], SVT_INT, dstSwizCount, &numParenthesis); - bformata(glsl, "%s(", name); - numParenthesis++; - TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); - AddAssignPrologue(numParenthesis); + bformata(glsl, "%s(", name); + numParenthesis++; + TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); + AddAssignPrologue(numParenthesis); } // Texel fetches etc need a dummy sampler (because glslang wants one, for Reasons(tm)). // Any non-shadow sampler will do, so try to get one from sampler registers. If the current shader doesn't have any, declare a dummy one. -std::string ToGLSL:: GetVulkanDummySamplerName() +std::string ToGLSL::GetVulkanDummySamplerName() { - std::string dummySmpName = "hlslcc_dummyPointClamp"; - if (!psContext->IsVulkan()) - return ""; - - const ResourceBinding *pSmpInfo = NULL; - int smpIdx = 0; - - while (psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_SAMPLER, smpIdx, &pSmpInfo) != 0) - { - if (pSmpInfo->m_SamplerMode != D3D10_SB_SAMPLER_MODE_COMPARISON) - return ResourceName(psContext, RGROUP_SAMPLER, smpIdx, 0); - - smpIdx++; - } - - if (!psContext->psShader->m_DummySamplerDeclared) - { - GLSLCrossDependencyData::VulkanResourceBinding binding = psContext->psDependencies->GetVulkanResourceBinding(dummySmpName); - bstring code = bfromcstr(""); - bformata(code, "layout(set = %d, binding = %d) uniform mediump sampler %s;", binding.first, binding.second, dummySmpName.c_str()); - DeclareExtraFunction(dummySmpName, code); - bdestroy(code); - psContext->psShader->m_DummySamplerDeclared = true; - } - return dummySmpName; + std::string dummySmpName = "hlslcc_dummyPointClamp"; + if (!psContext->IsVulkan()) + return ""; + + const ResourceBinding *pSmpInfo = NULL; + int smpIdx = 0; + + while (psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_SAMPLER, smpIdx, &pSmpInfo) != 0) + { + if (pSmpInfo->m_SamplerMode != D3D10_SB_SAMPLER_MODE_COMPARISON) + return ResourceName(psContext, RGROUP_SAMPLER, smpIdx, 0); + + smpIdx++; + } + + if (!psContext->psShader->m_DummySamplerDeclared) + { + GLSLCrossDependencyData::VulkanResourceBinding binding = psContext->psDependencies->GetVulkanResourceBinding(dummySmpName); + bstring code = bfromcstr(""); + bformata(code, "layout(set = %d, binding = %d) uniform mediump sampler %s;", binding.first, binding.second, dummySmpName.c_str()); + DeclareExtraFunction(dummySmpName, code); + bdestroy(code); + psContext->psShader->m_DummySamplerDeclared = true; + } + return dummySmpName; } void ToGLSL::TranslateTexelFetch( - Instruction* psInst, - const ResourceBinding* psBinding, - bstring glsl) + Instruction* psInst, + const ResourceBinding* psBinding, + bstring glsl) { - int numParenthesis = 0; - - std::string vulkanSamplerName = GetVulkanDummySamplerName(); - - std::string texName = ResourceName(psContext, RGROUP_TEXTURE, psInst->asOperands[2].ui32RegisterNumber, 0); - const bool hasOffset = (psInst->bAddressOffset != 0); - - // On Vulkan wrap the tex name with the sampler constructor - if (psContext->IsVulkan()) - { - const RESOURCE_DIMENSION eResDim = psContext->psShader->aeResourceDims[psInst->asOperands[2].ui32RegisterNumber]; - std::string smpType = GetSamplerType(psContext, eResDim, psInst->asOperands[2].ui32RegisterNumber); - std::ostringstream oss; - oss << smpType; - oss << "(" << texName << ", " << vulkanSamplerName << ")"; - texName = oss.str(); - } - - psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], TypeFlagsToSVTType(ResourceReturnTypeToFlag(psBinding->ui32ReturnType)), 4, &numParenthesis); - - if(hasOffset) - bcatcstr(glsl, "texelFetchOffset("); - else - bcatcstr(glsl, "texelFetch("); - - switch (psBinding->eDimension) - { - case REFLECT_RESOURCE_DIMENSION_TEXTURE1D: - case REFLECT_RESOURCE_DIMENSION_BUFFER: - { - bcatcstr(glsl, texName.c_str()); - bcatcstr(glsl, ", "); - TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER, OPERAND_4_COMPONENT_MASK_X); - // Buffers don't have LOD or offset - if (psBinding->eDimension != REFLECT_RESOURCE_DIMENSION_BUFFER) - { - bcatcstr(glsl, ", "); - TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER, OPERAND_4_COMPONENT_MASK_A); - if (hasOffset) - bformata(glsl, ", %d", psInst->iUAddrOffset); - } - bcatcstr(glsl, ")"); - break; - } - case REFLECT_RESOURCE_DIMENSION_TEXTURE2DARRAY: - case REFLECT_RESOURCE_DIMENSION_TEXTURE3D: - { - bcatcstr(glsl, texName.c_str()); - bcatcstr(glsl, ", "); - TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER | TO_AUTO_EXPAND_TO_VEC3, 7 /* .xyz */); - bcatcstr(glsl, ", "); - TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER, OPERAND_4_COMPONENT_MASK_A); - if (hasOffset && psBinding->eDimension == REFLECT_RESOURCE_DIMENSION_TEXTURE3D) - bformata(glsl, ", ivec3(%d, %d, %d)", psInst->iUAddrOffset, psInst->iVAddrOffset, psInst->iWAddrOffset); - if (hasOffset && psBinding->eDimension == REFLECT_RESOURCE_DIMENSION_TEXTURE2DARRAY) - bformata(glsl, ", ivec3(%d, %d)", psInst->iUAddrOffset, psInst->iVAddrOffset); - bcatcstr(glsl, ")"); - break; - } - case REFLECT_RESOURCE_DIMENSION_TEXTURE2D: - case REFLECT_RESOURCE_DIMENSION_TEXTURE1DARRAY: - { - bcatcstr(glsl, texName.c_str()); - bcatcstr(glsl, ", "); - TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER | TO_AUTO_EXPAND_TO_VEC2, 3 /* .xy */); - bcatcstr(glsl, ", "); - TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER, OPERAND_4_COMPONENT_MASK_A); - if (hasOffset && psBinding->eDimension == REFLECT_RESOURCE_DIMENSION_TEXTURE1DARRAY) - bformata(glsl, ", %d", psInst->iUAddrOffset); - if (hasOffset && psBinding->eDimension == REFLECT_RESOURCE_DIMENSION_TEXTURE2D) - bformata(glsl, ", ivec3(%d, %d)", psInst->iUAddrOffset, psInst->iVAddrOffset); - bcatcstr(glsl, ")"); - break; - } - case REFLECT_RESOURCE_DIMENSION_TEXTURE2DMS: - { - ASSERT(psInst->eOpcode == OPCODE_LD_MS); - bcatcstr(glsl, texName.c_str()); - bcatcstr(glsl, ", "); - TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER | TO_AUTO_EXPAND_TO_VEC2, 3 /* .xy */); - bcatcstr(glsl, ", "); - TranslateOperand(&psInst->asOperands[3], TO_FLAG_INTEGER, OPERAND_4_COMPONENT_MASK_X); - bcatcstr(glsl, ")"); - break; - } - case REFLECT_RESOURCE_DIMENSION_TEXTURE2DMSARRAY: - { - ASSERT(psInst->eOpcode == OPCODE_LD_MS); - bcatcstr(glsl, texName.c_str()); - bcatcstr(glsl, ", "); - TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER | TO_AUTO_EXPAND_TO_VEC3, 7 /* .xyz */); - bcatcstr(glsl, ", "); - TranslateOperand(&psInst->asOperands[3], TO_FLAG_INTEGER, OPERAND_4_COMPONENT_MASK_X); - bcatcstr(glsl, ")"); - break; - } - case REFLECT_RESOURCE_DIMENSION_TEXTURECUBE: - case REFLECT_RESOURCE_DIMENSION_TEXTURECUBEARRAY: - case REFLECT_RESOURCE_DIMENSION_BUFFEREX: - default: - { - // Not possible in either HLSL or GLSL - ASSERT(0); - break; - } - } - - TranslateOperandSwizzleWithMask(psContext, &psInst->asOperands[2], psInst->asOperands[0].GetAccessMask(), 0); - AddAssignPrologue(numParenthesis); + int numParenthesis = 0; + + std::string vulkanSamplerName = GetVulkanDummySamplerName(); + + std::string texName = ResourceName(psContext, RGROUP_TEXTURE, psInst->asOperands[2].ui32RegisterNumber, 0); + const bool hasOffset = (psInst->bAddressOffset != 0); + + // On Vulkan wrap the tex name with the sampler constructor + if (psContext->IsVulkan()) + { + const RESOURCE_DIMENSION eResDim = psContext->psShader->aeResourceDims[psInst->asOperands[2].ui32RegisterNumber]; + std::string smpType = GetSamplerType(psContext, eResDim, psInst->asOperands[2].ui32RegisterNumber); + std::ostringstream oss; + oss << smpType; + oss << "(" << texName << ", " << vulkanSamplerName << ")"; + texName = oss.str(); + } + + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], TypeFlagsToSVTType(ResourceReturnTypeToFlag(psBinding->ui32ReturnType)), 4, &numParenthesis); + + if (hasOffset) + bcatcstr(glsl, "texelFetchOffset("); + else + bcatcstr(glsl, "texelFetch("); + + switch (psBinding->eDimension) + { + case REFLECT_RESOURCE_DIMENSION_TEXTURE1D: + case REFLECT_RESOURCE_DIMENSION_BUFFER: + { + bcatcstr(glsl, texName.c_str()); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER, OPERAND_4_COMPONENT_MASK_X); + // Buffers don't have LOD or offset + if (psBinding->eDimension != REFLECT_RESOURCE_DIMENSION_BUFFER) + { + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER, OPERAND_4_COMPONENT_MASK_A); + if (hasOffset) + bformata(glsl, ", %d", psInst->iUAddrOffset); + } + bcatcstr(glsl, ")"); + break; + } + case REFLECT_RESOURCE_DIMENSION_TEXTURE2DARRAY: + case REFLECT_RESOURCE_DIMENSION_TEXTURE3D: + { + bcatcstr(glsl, texName.c_str()); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER | TO_AUTO_EXPAND_TO_VEC3, 7 /* .xyz */); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER, OPERAND_4_COMPONENT_MASK_A); + if (hasOffset && psBinding->eDimension == REFLECT_RESOURCE_DIMENSION_TEXTURE3D) + bformata(glsl, ", ivec3(%d, %d, %d)", psInst->iUAddrOffset, psInst->iVAddrOffset, psInst->iWAddrOffset); + if (hasOffset && psBinding->eDimension == REFLECT_RESOURCE_DIMENSION_TEXTURE2DARRAY) + bformata(glsl, ", ivec3(%d, %d)", psInst->iUAddrOffset, psInst->iVAddrOffset); + bcatcstr(glsl, ")"); + break; + } + case REFLECT_RESOURCE_DIMENSION_TEXTURE2D: + case REFLECT_RESOURCE_DIMENSION_TEXTURE1DARRAY: + { + bcatcstr(glsl, texName.c_str()); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER | TO_AUTO_EXPAND_TO_VEC2, 3 /* .xy */); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER, OPERAND_4_COMPONENT_MASK_A); + if (hasOffset && psBinding->eDimension == REFLECT_RESOURCE_DIMENSION_TEXTURE1DARRAY) + bformata(glsl, ", %d", psInst->iUAddrOffset); + if (hasOffset && psBinding->eDimension == REFLECT_RESOURCE_DIMENSION_TEXTURE2D) + bformata(glsl, ", ivec3(%d, %d)", psInst->iUAddrOffset, psInst->iVAddrOffset); + bcatcstr(glsl, ")"); + break; + } + case REFLECT_RESOURCE_DIMENSION_TEXTURE2DMS: + { + ASSERT(psInst->eOpcode == OPCODE_LD_MS); + bcatcstr(glsl, texName.c_str()); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER | TO_AUTO_EXPAND_TO_VEC2, 3 /* .xy */); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[3], TO_FLAG_INTEGER, OPERAND_4_COMPONENT_MASK_X); + bcatcstr(glsl, ")"); + break; + } + case REFLECT_RESOURCE_DIMENSION_TEXTURE2DMSARRAY: + { + ASSERT(psInst->eOpcode == OPCODE_LD_MS); + bcatcstr(glsl, texName.c_str()); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER | TO_AUTO_EXPAND_TO_VEC3, 7 /* .xyz */); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[3], TO_FLAG_INTEGER, OPERAND_4_COMPONENT_MASK_X); + bcatcstr(glsl, ")"); + break; + } + case REFLECT_RESOURCE_DIMENSION_TEXTURECUBE: + case REFLECT_RESOURCE_DIMENSION_TEXTURECUBEARRAY: + case REFLECT_RESOURCE_DIMENSION_BUFFEREX: + default: + { + // Not possible in either HLSL or GLSL + ASSERT(0); + break; + } + } + + TranslateOperandSwizzleWithMask(psContext, &psInst->asOperands[2], psInst->asOperands[0].GetAccessMask(), 0); + AddAssignPrologue(numParenthesis); } //Makes sure the texture coordinate swizzle is appropriate for the texture type. //i.e. vecX for X-dimension texture. //Currently supports floating point coord only, so not used for texelFetch. void ToGLSL::TranslateTexCoord( - const RESOURCE_DIMENSION eResDim, - Operand* psTexCoordOperand) + const RESOURCE_DIMENSION eResDim, + Operand* psTexCoordOperand) { - uint32_t flags = TO_AUTO_BITCAST_TO_FLOAT; - uint32_t opMask = OPERAND_4_COMPONENT_MASK_ALL; - - switch (eResDim) - { - case RESOURCE_DIMENSION_TEXTURE1D: - { - //Vec1 texcoord. Mask out the other components. - opMask = OPERAND_4_COMPONENT_MASK_X; - break; - } - case RESOURCE_DIMENSION_TEXTURE2D: - case RESOURCE_DIMENSION_TEXTURE1DARRAY: - { - //Vec2 texcoord. Mask out the other components. - opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y; - flags |= TO_AUTO_EXPAND_TO_VEC2; - break; - } - case RESOURCE_DIMENSION_TEXTURECUBE: - case RESOURCE_DIMENSION_TEXTURE3D: - case RESOURCE_DIMENSION_TEXTURE2DARRAY: - { - //Vec3 texcoord. Mask out the other components. - opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y | OPERAND_4_COMPONENT_MASK_Z; - flags |= TO_AUTO_EXPAND_TO_VEC3; - break; - } - case RESOURCE_DIMENSION_TEXTURECUBEARRAY: - { - flags |= TO_AUTO_EXPAND_TO_VEC4; - break; - } - default: - { - ASSERT(0); - break; - } - } - - //FIXME detect when integer coords are needed. - TranslateOperand(psTexCoordOperand, flags, opMask); + uint32_t flags = TO_AUTO_BITCAST_TO_FLOAT; + uint32_t opMask = OPERAND_4_COMPONENT_MASK_ALL; + + switch (eResDim) + { + case RESOURCE_DIMENSION_TEXTURE1D: + { + //Vec1 texcoord. Mask out the other components. + opMask = OPERAND_4_COMPONENT_MASK_X; + break; + } + case RESOURCE_DIMENSION_TEXTURE2D: + case RESOURCE_DIMENSION_TEXTURE1DARRAY: + { + //Vec2 texcoord. Mask out the other components. + opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y; + flags |= TO_AUTO_EXPAND_TO_VEC2; + break; + } + case RESOURCE_DIMENSION_TEXTURECUBE: + case RESOURCE_DIMENSION_TEXTURE3D: + case RESOURCE_DIMENSION_TEXTURE2DARRAY: + { + //Vec3 texcoord. Mask out the other components. + opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y | OPERAND_4_COMPONENT_MASK_Z; + flags |= TO_AUTO_EXPAND_TO_VEC3; + break; + } + case RESOURCE_DIMENSION_TEXTURECUBEARRAY: + { + flags |= TO_AUTO_EXPAND_TO_VEC4; + break; + } + default: + { + ASSERT(0); + break; + } + } + + //FIXME detect when integer coords are needed. + TranslateOperand(psTexCoordOperand, flags, opMask); } void ToGLSL::GetResInfoData(Instruction* psInst, int index, int destElem) { - bstring glsl = *psContext->currentGLSLString; - int numParenthesis = 0; - const RESINFO_RETURN_TYPE eResInfoReturnType = psInst->eResInfoReturnType; - bool isUAV = (psInst->asOperands[2].eType == OPERAND_TYPE_UNORDERED_ACCESS_VIEW); - bool isMS = psInst->eResDim == RESOURCE_DIMENSION_TEXTURE2DMS || psInst->eResDim == RESOURCE_DIMENSION_TEXTURE2DMSARRAY; - - std::string texName = ResourceName(psContext, isUAV ? RGROUP_UAV : RGROUP_TEXTURE, psInst->asOperands[2].ui32RegisterNumber, 0); - - // On Vulkan wrap the tex name with the sampler constructor - if (psContext->IsVulkan() && !isUAV) - { - std::string vulkanSamplerName = GetVulkanDummySamplerName(); - - const RESOURCE_DIMENSION eResDim = psContext->psShader->aeResourceDims[psInst->asOperands[2].ui32RegisterNumber]; - std::string smpType = GetSamplerType(psContext, eResDim, psInst->asOperands[2].ui32RegisterNumber); - std::ostringstream oss; - oss << smpType; - oss << "(" << texName << ", " << vulkanSamplerName << ")"; - texName = oss.str(); - } - - psContext->AddIndentation(); - AddOpAssignToDestWithMask(&psInst->asOperands[0], eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_UINT ? SVT_UINT : SVT_FLOAT, 1, "=", &numParenthesis, 1 << destElem); - - //[width, height, depth or array size, total-mip-count] - if (index < 3) - { - int dim = GetNumTextureDimensions(psInst->eResDim); - bcatcstr(glsl, "("); - if (dim < (index + 1)) - { - if (HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) - bcatcstr(glsl, eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_UINT ? "uint(0u)" : "0.0"); - else - bcatcstr(glsl, eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_UINT ? "int(0)" : "0.0"); - } - else - { - if (eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_UINT) - { - if (HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) - bformata(glsl, "uvec%d(", dim); - else - bformata(glsl, "ivec%d(", dim); - } - else if (eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_RCPFLOAT) - bformata(glsl, "vec%d(1.0) / vec%d(", dim, dim); - else - bformata(glsl, "vec%d(", dim); - - if (isUAV) - bcatcstr(glsl, "imageSize("); - else - bcatcstr(glsl, "textureSize("); - - bcatcstr(glsl, texName.c_str()); - - if (!isUAV && !isMS) - { - bcatcstr(glsl, ", "); - TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER); - } - bcatcstr(glsl, "))"); - - switch (index) - { - case 0: - bcatcstr(glsl, ".x"); - break; - case 1: - bcatcstr(glsl, ".y"); - break; - case 2: - bcatcstr(glsl, ".z"); - break; - } - } - - bcatcstr(glsl, ")"); - } - else - { - ASSERT(!isUAV); - if (eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_UINT) - { - if (HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) - bcatcstr(glsl, "uint("); - else - bcatcstr(glsl, "int("); - } - else - bcatcstr(glsl, "float("); - bcatcstr(glsl, "textureQueryLevels("); - bcatcstr(glsl, texName.c_str()); - bcatcstr(glsl, "))"); - } - AddAssignPrologue(numParenthesis); + bstring glsl = *psContext->currentGLSLString; + int numParenthesis = 0; + const RESINFO_RETURN_TYPE eResInfoReturnType = psInst->eResInfoReturnType; + bool isUAV = (psInst->asOperands[2].eType == OPERAND_TYPE_UNORDERED_ACCESS_VIEW); + bool isMS = psInst->eResDim == RESOURCE_DIMENSION_TEXTURE2DMS || psInst->eResDim == RESOURCE_DIMENSION_TEXTURE2DMSARRAY; + + std::string texName = ResourceName(psContext, isUAV ? RGROUP_UAV : RGROUP_TEXTURE, psInst->asOperands[2].ui32RegisterNumber, 0); + + // On Vulkan wrap the tex name with the sampler constructor + if (psContext->IsVulkan() && !isUAV) + { + std::string vulkanSamplerName = GetVulkanDummySamplerName(); + + const RESOURCE_DIMENSION eResDim = psContext->psShader->aeResourceDims[psInst->asOperands[2].ui32RegisterNumber]; + std::string smpType = GetSamplerType(psContext, eResDim, psInst->asOperands[2].ui32RegisterNumber); + std::ostringstream oss; + oss << smpType; + oss << "(" << texName << ", " << vulkanSamplerName << ")"; + texName = oss.str(); + } + + psContext->AddIndentation(); + AddOpAssignToDestWithMask(&psInst->asOperands[0], eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_UINT ? SVT_UINT : SVT_FLOAT, 1, "=", &numParenthesis, 1 << destElem); + + //[width, height, depth or array size, total-mip-count] + if (index < 3) + { + int dim = GetNumTextureDimensions(psInst->eResDim); + bcatcstr(glsl, "("); + if (dim < (index + 1)) + { + bcatcstr(glsl, eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_UINT ? (HaveUnsignedTypes(psContext->psShader->eTargetLanguage) ? "uint(0)" : "0") : "0.0"); // Old ES3.0 Adrenos treat 0u as const int. + } + else + { + if (eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_UINT) + { + if (HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) + bformata(glsl, "uvec%d(", dim); + else + bformata(glsl, "ivec%d(", dim); + } + else if (eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_RCPFLOAT) + bformata(glsl, "vec%d(1.0) / vec%d(", dim, dim); + else + bformata(glsl, "vec%d(", dim); + + if (isUAV) + bcatcstr(glsl, "imageSize("); + else + bcatcstr(glsl, "textureSize("); + + bcatcstr(glsl, texName.c_str()); + + if (!isUAV && !isMS) + { + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER); + } + bcatcstr(glsl, "))"); + + switch (index) + { + case 0: + bcatcstr(glsl, ".x"); + break; + case 1: + bcatcstr(glsl, ".y"); + break; + case 2: + bcatcstr(glsl, ".z"); + break; + } + } + + bcatcstr(glsl, ")"); + } + else + { + ASSERT(!isUAV); + if (eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_UINT) + { + if (HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) + bcatcstr(glsl, "uint("); + else + bcatcstr(glsl, "int("); + } + else + bcatcstr(glsl, "float("); + bcatcstr(glsl, "textureQueryLevels("); + bcatcstr(glsl, texName.c_str()); + bcatcstr(glsl, "))"); + } + AddAssignPrologue(numParenthesis); } void ToGLSL::TranslateTextureSample(Instruction* psInst, - uint32_t ui32Flags) + uint32_t ui32Flags) { - bstring glsl = *psContext->currentGLSLString; - int numParenthesis = 0; - int hasParamOffset = (ui32Flags & TEXSMP_FLAG_PARAMOFFSET) ? 1 : 0; - - Operand* psDest = &psInst->asOperands[0]; - Operand* psDestAddr = &psInst->asOperands[1]; - Operand* psSrcOff = (ui32Flags & TEXSMP_FLAG_PARAMOFFSET) ? &psInst->asOperands[2] : 0; - Operand* psSrcTex = &psInst->asOperands[2 + hasParamOffset]; - Operand* psSrcSamp = &psInst->asOperands[3 + hasParamOffset]; - Operand* psSrcRef = (ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE) ? &psInst->asOperands[4 + hasParamOffset] : 0; - Operand* psSrcLOD = (ui32Flags & TEXSMP_FLAG_LOD) ? &psInst->asOperands[4] : 0; - Operand* psSrcDx = (ui32Flags & TEXSMP_FLAG_GRAD) ? &psInst->asOperands[4] : 0; - Operand* psSrcDy = (ui32Flags & TEXSMP_FLAG_GRAD) ? &psInst->asOperands[5] : 0; - Operand* psSrcBias = (ui32Flags & TEXSMP_FLAG_BIAS) ? &psInst->asOperands[4] : 0; - - const char* funcName = "texture"; - const char* offset = ""; - const char* depthCmpCoordType = ""; - const char* gradSwizzle = ""; - const char* ext = ""; - - uint32_t ui32NumOffsets = 0; - - const RESOURCE_DIMENSION eResDim = psContext->psShader->aeResourceDims[psSrcTex->ui32RegisterNumber]; - const int iHaveOverloadedTexFuncs = HaveOverloadedTextureFuncs(psContext->psShader->eTargetLanguage); - const int useCombinedTextureSamplers = (psContext->flags & HLSLCC_FLAG_COMBINE_TEXTURE_SAMPLERS) ? 1 : 0; - - if (psInst->bAddressOffset) - { - offset = "Offset"; - } - - switch (eResDim) - { - case RESOURCE_DIMENSION_TEXTURE1D: - { - depthCmpCoordType = "vec2"; - gradSwizzle = ".x"; - ui32NumOffsets = 1; - if (!iHaveOverloadedTexFuncs) - { - funcName = "texture1D"; - if (ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE) - { - funcName = "shadow1D"; - } - } - break; - } - case RESOURCE_DIMENSION_TEXTURE2D: - { - depthCmpCoordType = "vec3"; - gradSwizzle = ".xy"; - ui32NumOffsets = 2; - if (!iHaveOverloadedTexFuncs) - { - funcName = "texture2D"; - if (ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE) - { - funcName = "shadow2D"; - } - } - break; - } - case RESOURCE_DIMENSION_TEXTURECUBE: - { - depthCmpCoordType = "vec4"; - gradSwizzle = ".xyz"; - ui32NumOffsets = 3; - if (!iHaveOverloadedTexFuncs) - { - funcName = "textureCube"; - } - break; - } - case RESOURCE_DIMENSION_TEXTURE3D: - { - depthCmpCoordType = "vec4"; - gradSwizzle = ".xyz"; - ui32NumOffsets = 3; - if (!iHaveOverloadedTexFuncs) - { - funcName = "texture3D"; - } - break; - } - case RESOURCE_DIMENSION_TEXTURE1DARRAY: - { - depthCmpCoordType = "vec3"; - gradSwizzle = ".x"; - ui32NumOffsets = 1; - break; - } - case RESOURCE_DIMENSION_TEXTURE2DARRAY: - { - depthCmpCoordType = "vec4"; - gradSwizzle = ".xy"; - ui32NumOffsets = 2; - break; - } - case RESOURCE_DIMENSION_TEXTURECUBEARRAY: - { - gradSwizzle = ".xyz"; - ui32NumOffsets = 3; - break; - } - default: - { - ASSERT(0); - break; - } - } - - if (ui32Flags & TEXSMP_FLAG_GATHER) - funcName = "textureGather"; - - uint32_t uniqueNameCounter = 0; - - // In GLSL, for every texture sampling func overload, except for cubemap arrays, the - // depth compare reference value is given as the last component of the texture coord vector. - // Cubemap array sampling as well as all the gather funcs have a separate parameter for it. - // HLSL always provides the reference as a separate param. - // - // Here we create a temp texcoord var with the reference value embedded - if ((ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE) && - eResDim != RESOURCE_DIMENSION_TEXTURECUBEARRAY && - !(ui32Flags & TEXSMP_FLAG_GATHER)) - { - uniqueNameCounter = psContext->psShader->asPhases[psContext->currentPhase].m_NextTexCoordTemp++; - psContext->AddIndentation(); - // Create a temp variable for the coordinate as Adrenos hate nonstandard swizzles in the texcoords - bformata(glsl, "%s txVec%d = ", depthCmpCoordType, uniqueNameCounter); - bformata(glsl, "%s(", depthCmpCoordType); - TranslateTexCoord(eResDim, psDestAddr); - bcatcstr(glsl, ","); - // Last component is the reference - TranslateOperand(psSrcRef, TO_AUTO_BITCAST_TO_FLOAT); - bcatcstr(glsl, ");\n"); - } - - SHADER_VARIABLE_TYPE dataType = psContext->psShader->sInfo.GetTextureDataType(psSrcTex->ui32RegisterNumber); - psContext->AddIndentation(); - AddAssignToDest(psDest, dataType, psSrcTex->GetNumSwizzleElements(), &numParenthesis); - - // GLSL doesn't have textureLod() for 2d shadow samplers, we'll have to use grad instead. In that case assume LOD 0. - const bool needsLodWorkaround = (eResDim == RESOURCE_DIMENSION_TEXTURE2DARRAY) && (ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE); - const bool needsLodWorkaroundES2 = (psContext->psShader->eTargetLanguage == LANG_ES_100 && psContext->psShader->eShaderType == PIXEL_SHADER && (ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE)); - - if (needsLodWorkaround) - { - bformata(glsl, "%sGrad%s(", funcName, offset); - } - else - { - if (psContext->psShader->eTargetLanguage == LANG_ES_100 && - psContext->psShader->eShaderType == PIXEL_SHADER && - ui32Flags & (TEXSMP_FLAG_LOD | TEXSMP_FLAG_FIRSTLOD | TEXSMP_FLAG_GRAD)) - ext = "EXT"; - - if (ui32Flags & (TEXSMP_FLAG_LOD | TEXSMP_FLAG_FIRSTLOD) && !needsLodWorkaroundES2) - bformata(glsl, "%sLod%s%s(", funcName, ext, offset); - else if (ui32Flags & TEXSMP_FLAG_GRAD) - bformata(glsl, "%sGrad%s%s(", funcName, ext, offset); - else - bformata(glsl, "%s%s%s(", funcName, ext, offset); - } - - if (psContext->IsVulkan()) - { - // Build the sampler name here - std::string samplerType = GetSamplerType(psContext, eResDim, psSrcTex->ui32RegisterNumber); - const ResourceBinding *pSmpRes = NULL; - psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_SAMPLER, psSrcSamp->ui32RegisterNumber, &pSmpRes); - - if (pSmpRes->m_SamplerMode == D3D10_SB_SAMPLER_MODE_COMPARISON) - samplerType.append("Shadow"); - std::string texName = ResourceName(psContext, RGROUP_TEXTURE, psSrcTex->ui32RegisterNumber, 0); - std::string smpName = ResourceName(psContext, RGROUP_SAMPLER, psSrcSamp->ui32RegisterNumber, 0); - bformata(glsl, "%s(%s, %s)", samplerType.c_str(), texName.c_str(), smpName.c_str()); - } - else - { - // Sampler name - if (!useCombinedTextureSamplers) - ResourceName(glsl, psContext, RGROUP_TEXTURE, psSrcTex->ui32RegisterNumber, ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE); - else - bcatcstr(glsl, TextureSamplerName(&psContext->psShader->sInfo, psSrcTex->ui32RegisterNumber, psSrcSamp->ui32RegisterNumber, ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE).c_str()); - } - bcatcstr(glsl, ", "); - - // Texture coordinates, either from previously constructed temp - // or straight from the psDestAddr operand - if ((ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE) && - eResDim != RESOURCE_DIMENSION_TEXTURECUBEARRAY && - !(ui32Flags & TEXSMP_FLAG_GATHER)) - bformata(glsl, "txVec%d", uniqueNameCounter); - else - TranslateTexCoord(eResDim, psDestAddr); - - // If depth compare reference was not embedded to texcoord - // then insert it here as a separate param - if ((ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE) && - eResDim == RESOURCE_DIMENSION_TEXTURECUBEARRAY && - (ui32Flags & TEXSMP_FLAG_GATHER)) - { - bcatcstr(glsl, ", "); - TranslateOperand(psSrcRef, TO_AUTO_BITCAST_TO_FLOAT); - } - - // Add LOD/grad parameters based on the flags - if (needsLodWorkaround) - { - bcatcstr(glsl, ", vec2(0.0, 0.0), vec2(0.0, 0.0)"); - } - else if (ui32Flags & TEXSMP_FLAG_LOD) - { - if (!needsLodWorkaroundES2) - { - bcatcstr(glsl, ", "); - TranslateOperand(psSrcLOD, TO_AUTO_BITCAST_TO_FLOAT); - if (psContext->psShader->ui32MajorVersion < 4) - { - bcatcstr(glsl, ".w"); - } - } - } - else if (ui32Flags & TEXSMP_FLAG_FIRSTLOD) - { - if (!needsLodWorkaroundES2) - bcatcstr(glsl, ", 0.0"); - } - else if (ui32Flags & TEXSMP_FLAG_GRAD) - { - bcatcstr(glsl, ", vec4("); - TranslateOperand(psSrcDx, TO_AUTO_BITCAST_TO_FLOAT); - bcatcstr(glsl, ")"); - bcatcstr(glsl, gradSwizzle); - bcatcstr(glsl, ", vec4("); - TranslateOperand(psSrcDy, TO_AUTO_BITCAST_TO_FLOAT); - bcatcstr(glsl, ")"); - bcatcstr(glsl, gradSwizzle); - } - - // Add offset param - if (psInst->bAddressOffset) - { - if (ui32NumOffsets == 1) - { - bformata(glsl, ", %d", - psInst->iUAddrOffset); - } - else - if (ui32NumOffsets == 2) - { - bformata(glsl, ", ivec2(%d, %d)", - psInst->iUAddrOffset, - psInst->iVAddrOffset); - } - else - if (ui32NumOffsets == 3) - { - bformata(glsl, ", ivec3(%d, %d, %d)", - psInst->iUAddrOffset, - psInst->iVAddrOffset, - psInst->iWAddrOffset); - } - } - // HLSL gather has a variant with separate offset operand - else if (ui32Flags & TEXSMP_FLAG_PARAMOFFSET) - { - uint32_t mask = OPERAND_4_COMPONENT_MASK_X; - if (ui32NumOffsets > 1) - mask |= OPERAND_4_COMPONENT_MASK_Y; - if (ui32NumOffsets > 2) - mask |= OPERAND_4_COMPONENT_MASK_Z; - - bcatcstr(glsl, ","); - TranslateOperand(psSrcOff, TO_FLAG_INTEGER, mask); - } - - // Add bias if present - if (ui32Flags & TEXSMP_FLAG_BIAS) - { - bcatcstr(glsl, ", "); - TranslateOperand(psSrcBias, TO_AUTO_BITCAST_TO_FLOAT); - } - - // Add texture gather component selection if needed - if ((ui32Flags & TEXSMP_FLAG_GATHER) && psSrcSamp->GetNumSwizzleElements() > 0) - { - ASSERT(psSrcSamp->GetNumSwizzleElements() == 1); - if (psSrcSamp->aui32Swizzle[0] != OPERAND_4_COMPONENT_X) - { - if (!(ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE)) - { - bformata(glsl, ", %d", psSrcSamp->aui32Swizzle[0]); - } - else - { - // Comp selection not supported with dephth compare gather - } - } - } - - bcatcstr(glsl, ")"); - - if (!(ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE) || (ui32Flags & TEXSMP_FLAG_GATHER)) - { - // iWriteMaskEnabled is forced off during DecodeOperand because swizzle on sampler uniforms - // does not make sense. But need to re-enable to correctly swizzle this particular instruction. - psSrcTex->iWriteMaskEnabled = 1; - TranslateOperandSwizzleWithMask(psContext, psSrcTex, psDest->GetAccessMask(), 0); - } - AddAssignPrologue(numParenthesis); + bstring glsl = *psContext->currentGLSLString; + int numParenthesis = 0; + int hasParamOffset = (ui32Flags & TEXSMP_FLAG_PARAMOFFSET) ? 1 : 0; + + Operand* psDest = &psInst->asOperands[0]; + Operand* psDestAddr = &psInst->asOperands[1]; + Operand* psSrcOff = (ui32Flags & TEXSMP_FLAG_PARAMOFFSET) ? &psInst->asOperands[2] : 0; + Operand* psSrcTex = &psInst->asOperands[2 + hasParamOffset]; + Operand* psSrcSamp = &psInst->asOperands[3 + hasParamOffset]; + Operand* psSrcRef = (ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE) ? &psInst->asOperands[4 + hasParamOffset] : 0; + Operand* psSrcLOD = (ui32Flags & TEXSMP_FLAG_LOD) ? &psInst->asOperands[4] : 0; + Operand* psSrcDx = (ui32Flags & TEXSMP_FLAG_GRAD) ? &psInst->asOperands[4] : 0; + Operand* psSrcDy = (ui32Flags & TEXSMP_FLAG_GRAD) ? &psInst->asOperands[5] : 0; + Operand* psSrcBias = (ui32Flags & TEXSMP_FLAG_BIAS) ? &psInst->asOperands[4] : 0; + + const char* funcName = "texture"; + const char* offset = ""; + const char* depthCmpCoordType = ""; + const char* gradSwizzle = ""; + const char* ext = ""; + + uint32_t ui32NumOffsets = 0; + + const RESOURCE_DIMENSION eResDim = psContext->psShader->aeResourceDims[psSrcTex->ui32RegisterNumber]; + const int iHaveOverloadedTexFuncs = HaveOverloadedTextureFuncs(psContext->psShader->eTargetLanguage); + const int useCombinedTextureSamplers = (psContext->flags & HLSLCC_FLAG_COMBINE_TEXTURE_SAMPLERS) ? 1 : 0; + + if (psInst->bAddressOffset) + { + offset = "Offset"; + } + if (psContext->IsSwitch() && psInst->eOpcode == OPCODE_GATHER4_PO) + { + // it seems that other GLSLCore compilers accept textureGather(sampler2D sampler, vec2 texCoord, ivec2 texelOffset, int component) with the "texelOffset" parameter, + // however this is not in the GLSL spec, and Switch's GLSLc compiler requires to use the textureGatherOffset version of the function + offset = "Offset"; + } + + switch (eResDim) + { + case RESOURCE_DIMENSION_TEXTURE1D: + { + depthCmpCoordType = "vec2"; + gradSwizzle = ".x"; + ui32NumOffsets = 1; + if (!iHaveOverloadedTexFuncs) + { + funcName = "texture1D"; + if (ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE) + { + funcName = "shadow1D"; + } + } + break; + } + case RESOURCE_DIMENSION_TEXTURE2D: + { + depthCmpCoordType = "vec3"; + gradSwizzle = ".xy"; + ui32NumOffsets = 2; + if (!iHaveOverloadedTexFuncs) + { + funcName = "texture2D"; + if (ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE) + { + funcName = "shadow2D"; + } + } + break; + } + case RESOURCE_DIMENSION_TEXTURECUBE: + { + depthCmpCoordType = "vec4"; + gradSwizzle = ".xyz"; + ui32NumOffsets = 3; + if (!iHaveOverloadedTexFuncs) + { + funcName = "textureCube"; + } + break; + } + case RESOURCE_DIMENSION_TEXTURE3D: + { + depthCmpCoordType = "vec4"; + gradSwizzle = ".xyz"; + ui32NumOffsets = 3; + if (!iHaveOverloadedTexFuncs) + { + funcName = "texture3D"; + } + break; + } + case RESOURCE_DIMENSION_TEXTURE1DARRAY: + { + depthCmpCoordType = "vec3"; + gradSwizzle = ".x"; + ui32NumOffsets = 1; + break; + } + case RESOURCE_DIMENSION_TEXTURE2DARRAY: + { + depthCmpCoordType = "vec4"; + gradSwizzle = ".xy"; + ui32NumOffsets = 2; + break; + } + case RESOURCE_DIMENSION_TEXTURECUBEARRAY: + { + gradSwizzle = ".xyz"; + ui32NumOffsets = 3; + break; + } + default: + { + ASSERT(0); + break; + } + } + + if (ui32Flags & TEXSMP_FLAG_GATHER) + funcName = "textureGather"; + + uint32_t uniqueNameCounter = 0; + + // In GLSL, for every texture sampling func overload, except for cubemap arrays, the + // depth compare reference value is given as the last component of the texture coord vector. + // Cubemap array sampling as well as all the gather funcs have a separate parameter for it. + // HLSL always provides the reference as a separate param. + // + // Here we create a temp texcoord var with the reference value embedded + if ((ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE) && + (eResDim != RESOURCE_DIMENSION_TEXTURECUBEARRAY && !(ui32Flags & TEXSMP_FLAG_GATHER))) + { + uniqueNameCounter = psContext->psShader->asPhases[psContext->currentPhase].m_NextTexCoordTemp++; + psContext->AddIndentation(); + // Create a temp variable for the coordinate as Adrenos hate nonstandard swizzles in the texcoords + bformata(glsl, "%s txVec%d = ", depthCmpCoordType, uniqueNameCounter); + bformata(glsl, "%s(", depthCmpCoordType); + TranslateTexCoord(eResDim, psDestAddr); + bcatcstr(glsl, ","); + // Last component is the reference + TranslateOperand(psSrcRef, TO_AUTO_BITCAST_TO_FLOAT); + bcatcstr(glsl, ");\n"); + } + + SHADER_VARIABLE_TYPE dataType = psContext->psShader->sInfo.GetTextureDataType(psSrcTex->ui32RegisterNumber); + psContext->AddIndentation(); + AddAssignToDest(psDest, dataType, psSrcTex->GetNumSwizzleElements(), &numParenthesis); + + // GLSL doesn't have textureLod() for 2d shadow samplers, we'll have to use grad instead. In that case assume LOD 0. + const bool needsLodWorkaround = (eResDim == RESOURCE_DIMENSION_TEXTURE2DARRAY) && (ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE); + const bool needsLodWorkaroundES2 = (psContext->psShader->eTargetLanguage == LANG_ES_100 && psContext->psShader->eShaderType == PIXEL_SHADER && (ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE)); + + if (needsLodWorkaround) + { + bformata(glsl, "%sGrad%s(", funcName, offset); + } + else + { + if (psContext->psShader->eTargetLanguage == LANG_ES_100 && + psContext->psShader->eShaderType == PIXEL_SHADER && + ui32Flags & (TEXSMP_FLAG_LOD | TEXSMP_FLAG_FIRSTLOD | TEXSMP_FLAG_GRAD)) + ext = "EXT"; + + if (ui32Flags & (TEXSMP_FLAG_LOD | TEXSMP_FLAG_FIRSTLOD) && !needsLodWorkaroundES2) + bformata(glsl, "%sLod%s%s(", funcName, ext, offset); + else if (ui32Flags & TEXSMP_FLAG_GRAD) + bformata(glsl, "%sGrad%s%s(", funcName, ext, offset); + else + bformata(glsl, "%s%s%s(", funcName, ext, offset); + } + + if (psContext->IsVulkan()) + { + // Build the sampler name here + std::string samplerType = GetSamplerType(psContext, eResDim, psSrcTex->ui32RegisterNumber); + const ResourceBinding *pSmpRes = NULL; + psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_SAMPLER, psSrcSamp->ui32RegisterNumber, &pSmpRes); + + if (pSmpRes->m_SamplerMode == D3D10_SB_SAMPLER_MODE_COMPARISON) + samplerType.append("Shadow"); + std::string texName = ResourceName(psContext, RGROUP_TEXTURE, psSrcTex->ui32RegisterNumber, 0); + std::string smpName = ResourceName(psContext, RGROUP_SAMPLER, psSrcSamp->ui32RegisterNumber, 0); + bformata(glsl, "%s(%s, %s)", samplerType.c_str(), texName.c_str(), smpName.c_str()); + } + else + { + // Sampler name + if (!useCombinedTextureSamplers) + ResourceName(glsl, psContext, RGROUP_TEXTURE, psSrcTex->ui32RegisterNumber, ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE); + else + bcatcstr(glsl, TextureSamplerName(&psContext->psShader->sInfo, psSrcTex->ui32RegisterNumber, psSrcSamp->ui32RegisterNumber, ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE).c_str()); + } + bcatcstr(glsl, ", "); + + // Texture coordinates, either from previously constructed temp + // or straight from the psDestAddr operand + if ((ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE) && + (eResDim != RESOURCE_DIMENSION_TEXTURECUBEARRAY && !(ui32Flags & TEXSMP_FLAG_GATHER))) + bformata(glsl, "txVec%d", uniqueNameCounter); + else + TranslateTexCoord(eResDim, psDestAddr); + + // If depth compare reference was not embedded to texcoord + // then insert it here as a separate param + if ((ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE) && + (eResDim == RESOURCE_DIMENSION_TEXTURECUBEARRAY || (ui32Flags & TEXSMP_FLAG_GATHER))) + { + bcatcstr(glsl, ", "); + TranslateOperand(psSrcRef, TO_AUTO_BITCAST_TO_FLOAT); + } + + // Add LOD/grad parameters based on the flags + if (needsLodWorkaround) + { + bcatcstr(glsl, ", vec2(0.0, 0.0), vec2(0.0, 0.0)"); + } + else if (ui32Flags & TEXSMP_FLAG_LOD) + { + if (!needsLodWorkaroundES2) + { + bcatcstr(glsl, ", "); + TranslateOperand(psSrcLOD, TO_AUTO_BITCAST_TO_FLOAT); + if (psContext->psShader->ui32MajorVersion < 4) + { + bcatcstr(glsl, ".w"); + } + } + } + else if (ui32Flags & TEXSMP_FLAG_FIRSTLOD) + { + if (!needsLodWorkaroundES2) + bcatcstr(glsl, ", 0.0"); + } + else if (ui32Flags & TEXSMP_FLAG_GRAD) + { + bcatcstr(glsl, ", vec4("); + TranslateOperand(psSrcDx, TO_AUTO_BITCAST_TO_FLOAT); + bcatcstr(glsl, ")"); + bcatcstr(glsl, gradSwizzle); + bcatcstr(glsl, ", vec4("); + TranslateOperand(psSrcDy, TO_AUTO_BITCAST_TO_FLOAT); + bcatcstr(glsl, ")"); + bcatcstr(glsl, gradSwizzle); + } + + // Add offset param + if (psInst->bAddressOffset) + { + if (ui32NumOffsets == 1) + { + bformata(glsl, ", %d", + psInst->iUAddrOffset); + } + else if (ui32NumOffsets == 2) + { + bformata(glsl, ", ivec2(%d, %d)", + psInst->iUAddrOffset, + psInst->iVAddrOffset); + } + else if (ui32NumOffsets == 3) + { + bformata(glsl, ", ivec3(%d, %d, %d)", + psInst->iUAddrOffset, + psInst->iVAddrOffset, + psInst->iWAddrOffset); + } + } + // HLSL gather has a variant with separate offset operand + else if (ui32Flags & TEXSMP_FLAG_PARAMOFFSET) + { + uint32_t mask = OPERAND_4_COMPONENT_MASK_X; + if (ui32NumOffsets > 1) + mask |= OPERAND_4_COMPONENT_MASK_Y; + if (ui32NumOffsets > 2) + mask |= OPERAND_4_COMPONENT_MASK_Z; + + bcatcstr(glsl, ","); + TranslateOperand(psSrcOff, TO_FLAG_INTEGER, mask); + } + + // Add bias if present + if (ui32Flags & TEXSMP_FLAG_BIAS) + { + bcatcstr(glsl, ", "); + TranslateOperand(psSrcBias, TO_AUTO_BITCAST_TO_FLOAT); + } + + // Add texture gather component selection if needed + if ((ui32Flags & TEXSMP_FLAG_GATHER) && psSrcSamp->GetNumSwizzleElements() > 0) + { + ASSERT(psSrcSamp->GetNumSwizzleElements() == 1); + if (psSrcSamp->aui32Swizzle[0] != OPERAND_4_COMPONENT_X) + { + if (!(ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE)) + { + bformata(glsl, ", %d", psSrcSamp->aui32Swizzle[0]); + } + else + { + // Component selection not supported with depth compare gather + } + } + } + + bcatcstr(glsl, ")"); + + if (!(ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE) || (ui32Flags & TEXSMP_FLAG_GATHER)) + { + // iWriteMaskEnabled is forced off during DecodeOperand because swizzle on sampler uniforms + // does not make sense. But need to re-enable to correctly swizzle this particular instruction. + psSrcTex->iWriteMaskEnabled = 1; + TranslateOperandSwizzleWithMask(psContext, psSrcTex, psDest->GetAccessMask(), 0); + } + AddAssignPrologue(numParenthesis); } const char* swizzleString[] = { ".x", ".y", ".z", ".w" }; @@ -1396,2987 +1379,2995 @@ const char* swizzleString[] = { ".x", ".y", ".z", ".w" }; // TODO: Would be better to eliminate the offset calculation ops and use indexes straight on. Could be tricky though... void ToGLSL::TranslateDynamicComponentSelection(const ShaderVarType* psVarType, const Operand* psByteAddr, uint32_t offset, uint32_t mask) { - bstring glsl = *psContext->currentGLSLString; - ASSERT(psVarType->Class == SVC_VECTOR); - - bcatcstr(glsl, "["); // Access vector component with [] notation - if (offset > 0) - bcatcstr(glsl, "("); - - if (HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) - { - // The var containing byte address to the requested element - TranslateOperand(psByteAddr, TO_FLAG_UNSIGNED_INTEGER, mask); - - if (offset > 0)// If the vector is part of a struct, there is an extra offset in our byte address - bformata(glsl, " - %du)", offset); // Subtract that first - - bcatcstr(glsl, " >> 0x2u"); // Convert byte offset to index: div by four - bcatcstr(glsl, "]"); - } - else - { - // The var containing byte address to the requested element - TranslateOperand(psByteAddr, TO_FLAG_INTEGER, mask); - - if (offset > 0)// If the vector is part of a struct, there is an extra offset in our byte address - bformata(glsl, " - %d)", offset); // Subtract that first - - bcatcstr(glsl, " >> 0x2"); // Convert byte offset to index: div by four - bcatcstr(glsl, "]"); - } + bstring glsl = *psContext->currentGLSLString; + ASSERT(psVarType->Class == SVC_VECTOR); + + bcatcstr(glsl, "["); // Access vector component with [] notation + if (offset > 0) + bcatcstr(glsl, "("); + + if (HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) + { + // The var containing byte address to the requested element + TranslateOperand(psByteAddr, TO_FLAG_UNSIGNED_INTEGER, mask); + + if (offset > 0)// If the vector is part of a struct, there is an extra offset in our byte address + bformata(glsl, " - %du)", offset); // Subtract that first + + bcatcstr(glsl, " >> 0x2u"); // Convert byte offset to index: div by four + bcatcstr(glsl, "]"); + } + else + { + // The var containing byte address to the requested element + TranslateOperand(psByteAddr, TO_FLAG_INTEGER, mask); + + if (offset > 0)// If the vector is part of a struct, there is an extra offset in our byte address + bformata(glsl, " - %d)", offset); // Subtract that first + + bcatcstr(glsl, " >> 0x2"); // Convert byte offset to index: div by four + bcatcstr(glsl, "]"); + } } void ToGLSL::TranslateShaderStorageStore(Instruction* psInst) { - bstring glsl = *psContext->currentGLSLString; - int component; - int srcComponent = 0; - - Operand* psDest = 0; - Operand* psDestAddr = 0; - Operand* psDestByteOff = 0; - Operand* psSrc = 0; - - switch (psInst->eOpcode) - { - case OPCODE_STORE_STRUCTURED: - psDest = &psInst->asOperands[0]; - psDestAddr = &psInst->asOperands[1]; - psDestByteOff = &psInst->asOperands[2]; - psSrc = &psInst->asOperands[3]; - break; - case OPCODE_STORE_RAW: - psDest = &psInst->asOperands[0]; - psDestByteOff = &psInst->asOperands[1]; - psSrc = &psInst->asOperands[2]; - break; - default: - ASSERT(0); - break; - } - - uint32_t dstOffFlag = TO_FLAG_UNSIGNED_INTEGER; - SHADER_VARIABLE_TYPE dstOffType = psDestByteOff->GetDataType(psContext); - if (!HaveUnsignedTypes(psContext->psShader->eTargetLanguage) || dstOffType == SVT_INT || dstOffType == SVT_INT16 || dstOffType == SVT_INT12) - dstOffFlag = TO_FLAG_INTEGER; - - for (component = 0; component < 4; component++) - { - ASSERT(psInst->asOperands[0].eSelMode == OPERAND_4_COMPONENT_MASK_MODE); - if (psInst->asOperands[0].ui32CompMask & (1 << component)) - { - psContext->AddIndentation(); - - TranslateOperand(psDest, TO_FLAG_DESTINATION | TO_FLAG_NAME_ONLY); - - if (psDest->eType != OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY) - bcatcstr(glsl, "_buf"); - - if (psDestAddr) - { - bcatcstr(glsl, "["); - TranslateOperand(psDestAddr, TO_FLAG_INTEGER | TO_FLAG_UNSIGNED_INTEGER); - bcatcstr(glsl, "].value"); - } - - bcatcstr(glsl, "[("); - TranslateOperand(psDestByteOff, dstOffFlag); - bcatcstr(glsl, " >> 2"); - if (dstOffFlag == TO_FLAG_UNSIGNED_INTEGER) - bcatcstr(glsl, "u"); - bcatcstr(glsl, ")"); - - if (component != 0) - { - bformata(glsl, " + %d", component); - if (dstOffFlag == TO_FLAG_UNSIGNED_INTEGER) - bcatcstr(glsl, "u"); - } - - bcatcstr(glsl, "]"); - - //Dest type is currently always a uint array. - bcatcstr(glsl, " = "); - if (psSrc->GetNumSwizzleElements() > 1) - TranslateOperand(psSrc, TO_FLAG_UNSIGNED_INTEGER, 1 << (srcComponent++)); - else - TranslateOperand(psSrc, TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_X); - - bcatcstr(glsl, ";\n"); - } - } + bstring glsl = *psContext->currentGLSLString; + int component; + int srcComponent = 0; + + Operand* psDest = 0; + Operand* psDestAddr = 0; + Operand* psDestByteOff = 0; + Operand* psSrc = 0; + + switch (psInst->eOpcode) + { + case OPCODE_STORE_STRUCTURED: + psDest = &psInst->asOperands[0]; + psDestAddr = &psInst->asOperands[1]; + psDestByteOff = &psInst->asOperands[2]; + psSrc = &psInst->asOperands[3]; + break; + case OPCODE_STORE_RAW: + psDest = &psInst->asOperands[0]; + psDestByteOff = &psInst->asOperands[1]; + psSrc = &psInst->asOperands[2]; + break; + default: + ASSERT(0); + break; + } + + uint32_t dstOffFlag = TO_FLAG_UNSIGNED_INTEGER; + SHADER_VARIABLE_TYPE dstOffType = psDestByteOff->GetDataType(psContext); + if (!HaveUnsignedTypes(psContext->psShader->eTargetLanguage) || dstOffType == SVT_INT || dstOffType == SVT_INT16 || dstOffType == SVT_INT12) + dstOffFlag = TO_FLAG_INTEGER; + + for (component = 0; component < 4; component++) + { + ASSERT(psInst->asOperands[0].eSelMode == OPERAND_4_COMPONENT_MASK_MODE); + if (psInst->asOperands[0].ui32CompMask & (1 << component)) + { + psContext->AddIndentation(); + + TranslateOperand(psDest, TO_FLAG_DESTINATION | TO_FLAG_NAME_ONLY); + + if (psDest->eType != OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY) + bcatcstr(glsl, "_buf"); + + if (psDestAddr) + { + bcatcstr(glsl, "["); + TranslateOperand(psDestAddr, TO_FLAG_INTEGER | TO_FLAG_UNSIGNED_INTEGER); + bcatcstr(glsl, "].value"); + } + + bcatcstr(glsl, "[("); + TranslateOperand(psDestByteOff, dstOffFlag); + bcatcstr(glsl, " >> 2"); + if (dstOffFlag == TO_FLAG_UNSIGNED_INTEGER) + bcatcstr(glsl, "u"); + bcatcstr(glsl, ")"); + + if (component != 0) + { + bformata(glsl, " + %d", component); + if (dstOffFlag == TO_FLAG_UNSIGNED_INTEGER) + bcatcstr(glsl, "u"); + } + + bcatcstr(glsl, "]"); + + uint32_t srcFlag = TO_FLAG_UNSIGNED_INTEGER; + if (DeclareRWStructuredBufferTemplateTypeAsInteger(psContext, psDest)) + srcFlag = TO_FLAG_INTEGER; + + bcatcstr(glsl, " = "); + if (psSrc->GetNumSwizzleElements() > 1) + TranslateOperand(psSrc, srcFlag, 1 << (srcComponent++)); + else + TranslateOperand(psSrc, srcFlag, OPERAND_4_COMPONENT_MASK_X); + + bcatcstr(glsl, ";\n"); + } + } } + void ToGLSL::TranslateShaderStorageLoad(Instruction* psInst) { - bstring glsl = *psContext->currentGLSLString; - int component; - Operand* psDest = 0; - Operand* psSrcAddr = 0; - Operand* psSrcByteOff = 0; - Operand* psSrc = 0; - - switch (psInst->eOpcode) - { - case OPCODE_LD_STRUCTURED: - psDest = &psInst->asOperands[0]; - psSrcAddr = &psInst->asOperands[1]; - psSrcByteOff = &psInst->asOperands[2]; - psSrc = &psInst->asOperands[3]; - break; - case OPCODE_LD_RAW: - psDest = &psInst->asOperands[0]; - psSrcByteOff = &psInst->asOperands[1]; - psSrc = &psInst->asOperands[2]; - break; - default: - ASSERT(0); - break; - } - - uint32_t destCount = psDest->GetNumSwizzleElements(); - uint32_t destMask = psDest->GetAccessMask(); - - int numParenthesis = 0; - int firstItemAdded = 0; - SHADER_VARIABLE_TYPE destDataType = psDest->GetDataType(psContext); - uint32_t srcOffFlag = TO_FLAG_UNSIGNED_INTEGER; - SHADER_VARIABLE_TYPE srcOffType = psSrcByteOff->GetDataType(psContext); - if (!HaveUnsignedTypes(psContext->psShader->eTargetLanguage) || srcOffType == SVT_INT || srcOffType == SVT_INT16 || srcOffType == SVT_INT12) - srcOffFlag = TO_FLAG_INTEGER; - - psContext->AddIndentation(); - AddAssignToDest(psDest, destDataType, destCount, &numParenthesis); //TODO check this out? - if (destCount > 1) - { - bformata(glsl, "%s(", GetConstructorForTypeGLSL(psContext, destDataType, destCount, false)); - numParenthesis++; - } - for (component = 0; component < 4; component++) - { - int addedBitcast = 0; - if (!(destMask & (1 << component))) - continue; - - if (firstItemAdded) - bcatcstr(glsl, ", "); - else - firstItemAdded = 1; - - // always uint array atm - if (destDataType == SVT_FLOAT) - { - if (HaveBitEncodingOps(psContext->psShader->eTargetLanguage)) - bcatcstr(glsl, "uintBitsToFloat("); - else - bcatcstr(glsl, "float("); - addedBitcast = 1; - } - else if (destDataType == SVT_INT || destDataType == SVT_INT16 || destDataType == SVT_INT12) - { - bcatcstr(glsl, "int("); - addedBitcast = 1; - } - - TranslateOperand(psSrc, TO_FLAG_NAME_ONLY); - - if (psSrc->eType != OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY) - bcatcstr(glsl, "_buf"); - - if (psSrcAddr) - { - bcatcstr(glsl, "["); - TranslateOperand(psSrcAddr, TO_FLAG_UNSIGNED_INTEGER | TO_FLAG_INTEGER); - bcatcstr(glsl, "].value"); - } - bcatcstr(glsl, "[("); - TranslateOperand(psSrcByteOff, srcOffFlag); - bcatcstr(glsl, " >> 2"); - if (srcOffFlag == TO_FLAG_UNSIGNED_INTEGER) - bcatcstr(glsl, "u"); - - bformata(glsl, ") + %d", psSrc->eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE ? psSrc->aui32Swizzle[component] : component); - if (srcOffFlag == TO_FLAG_UNSIGNED_INTEGER) - bcatcstr(glsl, "u"); - - bcatcstr(glsl, "]"); - - if (addedBitcast) - bcatcstr(glsl, ")"); - } - AddAssignPrologue(numParenthesis); + bstring glsl = *psContext->currentGLSLString; + int component; + Operand* psDest = 0; + Operand* psSrcAddr = 0; + Operand* psSrcByteOff = 0; + Operand* psSrc = 0; + + switch (psInst->eOpcode) + { + case OPCODE_LD_STRUCTURED: + psDest = &psInst->asOperands[0]; + psSrcAddr = &psInst->asOperands[1]; + psSrcByteOff = &psInst->asOperands[2]; + psSrc = &psInst->asOperands[3]; + break; + case OPCODE_LD_RAW: + psDest = &psInst->asOperands[0]; + psSrcByteOff = &psInst->asOperands[1]; + psSrc = &psInst->asOperands[2]; + break; + default: + ASSERT(0); + break; + } + + uint32_t destCount = psDest->GetNumSwizzleElements(); + uint32_t destMask = psDest->GetAccessMask(); + + int numParenthesis = 0; + int firstItemAdded = 0; + SHADER_VARIABLE_TYPE destDataType = psDest->GetDataType(psContext); + uint32_t srcOffFlag = TO_FLAG_UNSIGNED_INTEGER; + SHADER_VARIABLE_TYPE srcOffType = psSrcByteOff->GetDataType(psContext); + if (!HaveUnsignedTypes(psContext->psShader->eTargetLanguage) || srcOffType == SVT_INT || srcOffType == SVT_INT16 || srcOffType == SVT_INT12) + srcOffFlag = TO_FLAG_INTEGER; + + psContext->AddIndentation(); + AddAssignToDest(psDest, destDataType, destCount, &numParenthesis); //TODO check this out? + if (destCount > 1) + { + bformata(glsl, "%s(", GetConstructorForTypeGLSL(psContext, destDataType, destCount, false)); + numParenthesis++; + } + for (component = 0; component < 4; component++) + { + int addedBitcast = 0; + if (!(destMask & (1 << component))) + continue; + + if (firstItemAdded) + bcatcstr(glsl, ", "); + else + firstItemAdded = 1; + + // always uint array atm + if (destDataType == SVT_FLOAT) + { + if (HaveBitEncodingOps(psContext->psShader->eTargetLanguage)) + bcatcstr(glsl, "uintBitsToFloat("); + else + bcatcstr(glsl, "float("); + addedBitcast = 1; + } + else if (destDataType == SVT_INT || destDataType == SVT_INT16 || destDataType == SVT_INT12) + { + bcatcstr(glsl, "int("); + addedBitcast = 1; + } + + TranslateOperand(psSrc, TO_FLAG_NAME_ONLY); + + if (psSrc->eType != OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY) + bcatcstr(glsl, "_buf"); + + if (psSrcAddr) + { + bcatcstr(glsl, "["); + TranslateOperand(psSrcAddr, TO_FLAG_UNSIGNED_INTEGER | TO_FLAG_INTEGER); + bcatcstr(glsl, "].value"); + } + bcatcstr(glsl, "[("); + TranslateOperand(psSrcByteOff, srcOffFlag); + bcatcstr(glsl, " >> 2"); + if (srcOffFlag == TO_FLAG_UNSIGNED_INTEGER) + bcatcstr(glsl, "u"); + + bformata(glsl, ") + %d", psSrc->eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE ? psSrc->aui32Swizzle[component] : component); + if (srcOffFlag == TO_FLAG_UNSIGNED_INTEGER) + bcatcstr(glsl, "u"); + + bcatcstr(glsl, "]"); + + if (addedBitcast) + bcatcstr(glsl, ")"); + } + AddAssignPrologue(numParenthesis); } void ToGLSL::TranslateAtomicMemOp(Instruction* psInst) { - bstring glsl = *psContext->currentGLSLString; - int numParenthesis = 0; - uint32_t ui32DataTypeFlag = TO_FLAG_INTEGER; - const char* func = ""; - Operand* dest = 0; - Operand* previousValue = 0; - Operand* destAddr = 0; - Operand* src = 0; - Operand* compare = 0; - int texDim = 0; - bool isUint = true; - - switch (psInst->eOpcode) - { - case OPCODE_IMM_ATOMIC_IADD: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//IMM_ATOMIC_IADD\n"); -#endif - func = "Add"; - previousValue = &psInst->asOperands[0]; - dest = &psInst->asOperands[1]; - destAddr = &psInst->asOperands[2]; - src = &psInst->asOperands[3]; - break; - } - case OPCODE_ATOMIC_IADD: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ATOMIC_IADD\n"); -#endif - func = "Add"; - dest = &psInst->asOperands[0]; - destAddr = &psInst->asOperands[1]; - src = &psInst->asOperands[2]; - break; - } - case OPCODE_IMM_ATOMIC_AND: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//IMM_ATOMIC_AND\n"); -#endif - func = "And"; - previousValue = &psInst->asOperands[0]; - dest = &psInst->asOperands[1]; - destAddr = &psInst->asOperands[2]; - src = &psInst->asOperands[3]; - break; - } - case OPCODE_ATOMIC_AND: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ATOMIC_AND\n"); -#endif - func = "And"; - dest = &psInst->asOperands[0]; - destAddr = &psInst->asOperands[1]; - src = &psInst->asOperands[2]; - break; - } - case OPCODE_IMM_ATOMIC_OR: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//IMM_ATOMIC_OR\n"); -#endif - func = "Or"; - previousValue = &psInst->asOperands[0]; - dest = &psInst->asOperands[1]; - destAddr = &psInst->asOperands[2]; - src = &psInst->asOperands[3]; - break; - } - case OPCODE_ATOMIC_OR: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ATOMIC_OR\n"); -#endif - func = "Or"; - dest = &psInst->asOperands[0]; - destAddr = &psInst->asOperands[1]; - src = &psInst->asOperands[2]; - break; - } - case OPCODE_IMM_ATOMIC_XOR: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//IMM_ATOMIC_XOR\n"); -#endif - func = "Xor"; - previousValue = &psInst->asOperands[0]; - dest = &psInst->asOperands[1]; - destAddr = &psInst->asOperands[2]; - src = &psInst->asOperands[3]; - break; - } - case OPCODE_ATOMIC_XOR: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ATOMIC_XOR\n"); -#endif - func = "Xor"; - dest = &psInst->asOperands[0]; - destAddr = &psInst->asOperands[1]; - src = &psInst->asOperands[2]; - break; - } - - case OPCODE_IMM_ATOMIC_EXCH: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//IMM_ATOMIC_EXCH\n"); -#endif - func = "Exchange"; - previousValue = &psInst->asOperands[0]; - dest = &psInst->asOperands[1]; - destAddr = &psInst->asOperands[2]; - src = &psInst->asOperands[3]; - break; - } - case OPCODE_IMM_ATOMIC_CMP_EXCH: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//IMM_ATOMIC_CMP_EXC\n"); -#endif - func = "CompSwap"; - previousValue = &psInst->asOperands[0]; - dest = &psInst->asOperands[1]; - destAddr = &psInst->asOperands[2]; - compare = &psInst->asOperands[3]; - src = &psInst->asOperands[4]; - break; - } - case OPCODE_ATOMIC_CMP_STORE: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ATOMIC_CMP_STORE\n"); -#endif - func = "CompSwap"; - previousValue = 0; - dest = &psInst->asOperands[0]; - destAddr = &psInst->asOperands[1]; - compare = &psInst->asOperands[2]; - src = &psInst->asOperands[3]; - break; - } - case OPCODE_IMM_ATOMIC_UMIN: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//IMM_ATOMIC_UMIN\n"); -#endif - func = "Min"; - previousValue = &psInst->asOperands[0]; - dest = &psInst->asOperands[1]; - destAddr = &psInst->asOperands[2]; - src = &psInst->asOperands[3]; - break; - } - case OPCODE_ATOMIC_UMIN: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ATOMIC_UMIN\n"); -#endif - func = "Min"; - dest = &psInst->asOperands[0]; - destAddr = &psInst->asOperands[1]; - src = &psInst->asOperands[2]; - break; - } - case OPCODE_IMM_ATOMIC_IMIN: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//IMM_ATOMIC_IMIN\n"); -#endif - func = "Min"; - previousValue = &psInst->asOperands[0]; - dest = &psInst->asOperands[1]; - destAddr = &psInst->asOperands[2]; - src = &psInst->asOperands[3]; - break; - } - case OPCODE_ATOMIC_IMIN: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ATOMIC_IMIN\n"); -#endif - func = "Min"; - dest = &psInst->asOperands[0]; - destAddr = &psInst->asOperands[1]; - src = &psInst->asOperands[2]; - break; - } - case OPCODE_IMM_ATOMIC_UMAX: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//IMM_ATOMIC_UMAX\n"); -#endif - func = "Max"; - previousValue = &psInst->asOperands[0]; - dest = &psInst->asOperands[1]; - destAddr = &psInst->asOperands[2]; - src = &psInst->asOperands[3]; - break; - } - case OPCODE_ATOMIC_UMAX: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ATOMIC_UMAX\n"); -#endif - func = "Max"; - dest = &psInst->asOperands[0]; - destAddr = &psInst->asOperands[1]; - src = &psInst->asOperands[2]; - break; - } - case OPCODE_IMM_ATOMIC_IMAX: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//IMM_ATOMIC_IMAX\n"); -#endif - func = "Max"; - previousValue = &psInst->asOperands[0]; - dest = &psInst->asOperands[1]; - destAddr = &psInst->asOperands[2]; - src = &psInst->asOperands[3]; - break; - } - case OPCODE_ATOMIC_IMAX: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ATOMIC_IMAX\n"); -#endif - func = "Max"; - dest = &psInst->asOperands[0]; - destAddr = &psInst->asOperands[1]; - src = &psInst->asOperands[2]; - break; - } - default: - ASSERT(0); - break; - } - - psContext->AddIndentation(); - - if (dest->eType != OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY) - { - const ResourceBinding* psBinding = 0; - psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_UAV, dest->ui32RegisterNumber, &psBinding); - - if (psBinding->eType == RTYPE_UAV_RWTYPED) - { - isUint = (psBinding->ui32ReturnType == RETURN_TYPE_UINT); - - // Find out if it's texture and of what dimension - switch (psBinding->eDimension) - { - case REFLECT_RESOURCE_DIMENSION_TEXTURE1D: - case REFLECT_RESOURCE_DIMENSION_BUFFER: - texDim = 1; - break; - case REFLECT_RESOURCE_DIMENSION_TEXTURECUBE: - case REFLECT_RESOURCE_DIMENSION_TEXTURE1DARRAY: - case REFLECT_RESOURCE_DIMENSION_TEXTURE2D: - case REFLECT_RESOURCE_DIMENSION_TEXTURE2DMS: - texDim = 2; - break; - case REFLECT_RESOURCE_DIMENSION_TEXTURE3D: - case REFLECT_RESOURCE_DIMENSION_TEXTURE2DARRAY: - case REFLECT_RESOURCE_DIMENSION_TEXTURE2DMSARRAY: - case REFLECT_RESOURCE_DIMENSION_TEXTURECUBEARRAY: - texDim = 3; - break; - default: - ASSERT(0); - break; - } - } - } - - if (isUint && HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) - ui32DataTypeFlag = TO_FLAG_UNSIGNED_INTEGER | TO_AUTO_BITCAST_TO_UINT; - else - ui32DataTypeFlag = TO_FLAG_INTEGER | TO_AUTO_BITCAST_TO_INT; - - if (previousValue) - AddAssignToDest(previousValue, isUint ? SVT_UINT : SVT_INT, 1, &numParenthesis); - - if (texDim > 0) - bcatcstr(glsl, "imageAtomic"); - else - bcatcstr(glsl, "atomic"); - - bcatcstr(glsl, func); - bcatcstr(glsl, "("); - - TranslateOperand(dest, TO_FLAG_DESTINATION | TO_FLAG_NAME_ONLY); - if (texDim > 0) - { - bcatcstr(glsl, ", "); - unsigned int compMask = OPERAND_4_COMPONENT_MASK_X; - if (texDim >= 2) - compMask |= OPERAND_4_COMPONENT_MASK_Y; - if (texDim == 3) - compMask |= OPERAND_4_COMPONENT_MASK_Z; - - TranslateOperand(destAddr, TO_FLAG_INTEGER, compMask); - } - else - { - if (dest->eType != OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY) - bcatcstr(glsl, "_buf"); - - uint32_t destAddrFlag = TO_FLAG_UNSIGNED_INTEGER; - SHADER_VARIABLE_TYPE destAddrType = destAddr->GetDataType(psContext); - if (!HaveUnsignedTypes(psContext->psShader->eTargetLanguage) || destAddrType == SVT_INT || destAddrType == SVT_INT16 || destAddrType == SVT_INT12) - destAddrFlag = TO_FLAG_INTEGER; - - bcatcstr(glsl, "["); - TranslateOperand(destAddr, destAddrFlag, OPERAND_4_COMPONENT_MASK_X); - - // Structured buf if we have both x & y swizzles. Raw buf has only x -> no .value[] - if (destAddr->GetNumSwizzleElements(OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y) == 2) - { - bcatcstr(glsl, "]"); - - bcatcstr(glsl, ".value["); - TranslateOperand(destAddr, destAddrFlag, OPERAND_4_COMPONENT_MASK_Y); - } - - bcatcstr(glsl, " >> 2");//bytes to floats - if (destAddrFlag == TO_FLAG_UNSIGNED_INTEGER) - bcatcstr(glsl, "u"); - - bcatcstr(glsl, "]"); - } - - bcatcstr(glsl, ", "); - - if (compare) - { - TranslateOperand(compare, ui32DataTypeFlag); - bcatcstr(glsl, ", "); - } - - TranslateOperand(src, ui32DataTypeFlag); - bcatcstr(glsl, ")"); - if (previousValue) - { - AddAssignPrologue(numParenthesis); - } - else - bcatcstr(glsl, ";\n"); + bstring glsl = *psContext->currentGLSLString; + int numParenthesis = 0; + uint32_t ui32DstDataTypeFlag = TO_FLAG_DESTINATION | TO_FLAG_NAME_ONLY; + uint32_t ui32DataTypeFlag = TO_FLAG_INTEGER; + const char* func = ""; + Operand* dest = 0; + Operand* previousValue = 0; + Operand* destAddr = 0; + Operand* src = 0; + Operand* compare = 0; + int texDim = 0; + bool isUint = true; + + switch (psInst->eOpcode) + { + case OPCODE_IMM_ATOMIC_IADD: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_IADD\n"); +#endif + func = "Add"; + previousValue = &psInst->asOperands[0]; + dest = &psInst->asOperands[1]; + destAddr = &psInst->asOperands[2]; + src = &psInst->asOperands[3]; + break; + } + case OPCODE_ATOMIC_IADD: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ATOMIC_IADD\n"); +#endif + func = "Add"; + dest = &psInst->asOperands[0]; + destAddr = &psInst->asOperands[1]; + src = &psInst->asOperands[2]; + break; + } + case OPCODE_IMM_ATOMIC_AND: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_AND\n"); +#endif + func = "And"; + previousValue = &psInst->asOperands[0]; + dest = &psInst->asOperands[1]; + destAddr = &psInst->asOperands[2]; + src = &psInst->asOperands[3]; + break; + } + case OPCODE_ATOMIC_AND: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ATOMIC_AND\n"); +#endif + func = "And"; + dest = &psInst->asOperands[0]; + destAddr = &psInst->asOperands[1]; + src = &psInst->asOperands[2]; + break; + } + case OPCODE_IMM_ATOMIC_OR: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_OR\n"); +#endif + func = "Or"; + previousValue = &psInst->asOperands[0]; + dest = &psInst->asOperands[1]; + destAddr = &psInst->asOperands[2]; + src = &psInst->asOperands[3]; + break; + } + case OPCODE_ATOMIC_OR: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ATOMIC_OR\n"); +#endif + func = "Or"; + dest = &psInst->asOperands[0]; + destAddr = &psInst->asOperands[1]; + src = &psInst->asOperands[2]; + break; + } + case OPCODE_IMM_ATOMIC_XOR: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_XOR\n"); +#endif + func = "Xor"; + previousValue = &psInst->asOperands[0]; + dest = &psInst->asOperands[1]; + destAddr = &psInst->asOperands[2]; + src = &psInst->asOperands[3]; + break; + } + case OPCODE_ATOMIC_XOR: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ATOMIC_XOR\n"); +#endif + func = "Xor"; + dest = &psInst->asOperands[0]; + destAddr = &psInst->asOperands[1]; + src = &psInst->asOperands[2]; + break; + } + + case OPCODE_IMM_ATOMIC_EXCH: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_EXCH\n"); +#endif + func = "Exchange"; + previousValue = &psInst->asOperands[0]; + dest = &psInst->asOperands[1]; + destAddr = &psInst->asOperands[2]; + src = &psInst->asOperands[3]; + break; + } + case OPCODE_IMM_ATOMIC_CMP_EXCH: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_CMP_EXC\n"); +#endif + func = "CompSwap"; + previousValue = &psInst->asOperands[0]; + dest = &psInst->asOperands[1]; + destAddr = &psInst->asOperands[2]; + compare = &psInst->asOperands[3]; + src = &psInst->asOperands[4]; + break; + } + case OPCODE_ATOMIC_CMP_STORE: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ATOMIC_CMP_STORE\n"); +#endif + func = "CompSwap"; + previousValue = 0; + dest = &psInst->asOperands[0]; + destAddr = &psInst->asOperands[1]; + compare = &psInst->asOperands[2]; + src = &psInst->asOperands[3]; + break; + } + case OPCODE_IMM_ATOMIC_UMIN: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_UMIN\n"); +#endif + func = "Min"; + previousValue = &psInst->asOperands[0]; + dest = &psInst->asOperands[1]; + destAddr = &psInst->asOperands[2]; + src = &psInst->asOperands[3]; + break; + } + case OPCODE_ATOMIC_UMIN: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ATOMIC_UMIN\n"); +#endif + func = "Min"; + dest = &psInst->asOperands[0]; + destAddr = &psInst->asOperands[1]; + src = &psInst->asOperands[2]; + break; + } + case OPCODE_IMM_ATOMIC_IMIN: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_IMIN\n"); +#endif + func = "Min"; + previousValue = &psInst->asOperands[0]; + dest = &psInst->asOperands[1]; + destAddr = &psInst->asOperands[2]; + src = &psInst->asOperands[3]; + break; + } + case OPCODE_ATOMIC_IMIN: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ATOMIC_IMIN\n"); +#endif + func = "Min"; + dest = &psInst->asOperands[0]; + destAddr = &psInst->asOperands[1]; + src = &psInst->asOperands[2]; + break; + } + case OPCODE_IMM_ATOMIC_UMAX: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_UMAX\n"); +#endif + func = "Max"; + previousValue = &psInst->asOperands[0]; + dest = &psInst->asOperands[1]; + destAddr = &psInst->asOperands[2]; + src = &psInst->asOperands[3]; + break; + } + case OPCODE_ATOMIC_UMAX: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ATOMIC_UMAX\n"); +#endif + func = "Max"; + dest = &psInst->asOperands[0]; + destAddr = &psInst->asOperands[1]; + src = &psInst->asOperands[2]; + break; + } + case OPCODE_IMM_ATOMIC_IMAX: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_IMAX\n"); +#endif + func = "Max"; + previousValue = &psInst->asOperands[0]; + dest = &psInst->asOperands[1]; + destAddr = &psInst->asOperands[2]; + src = &psInst->asOperands[3]; + break; + } + case OPCODE_ATOMIC_IMAX: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ATOMIC_IMAX\n"); +#endif + func = "Max"; + dest = &psInst->asOperands[0]; + destAddr = &psInst->asOperands[1]; + src = &psInst->asOperands[2]; + break; + } + default: + ASSERT(0); + break; + } + + psContext->AddIndentation(); + + if (dest->eType != OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY) + { + const ResourceBinding* psBinding = 0; + psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_UAV, dest->ui32RegisterNumber, &psBinding); + + if (psBinding->eType == RTYPE_UAV_RWTYPED) + { + isUint = (psBinding->ui32ReturnType == RETURN_TYPE_UINT); + + // Find out if it's texture and of what dimension + switch (psBinding->eDimension) + { + case REFLECT_RESOURCE_DIMENSION_TEXTURE1D: + case REFLECT_RESOURCE_DIMENSION_BUFFER: + texDim = 1; + break; + case REFLECT_RESOURCE_DIMENSION_TEXTURECUBE: + case REFLECT_RESOURCE_DIMENSION_TEXTURE1DARRAY: + case REFLECT_RESOURCE_DIMENSION_TEXTURE2D: + case REFLECT_RESOURCE_DIMENSION_TEXTURE2DMS: + texDim = 2; + break; + case REFLECT_RESOURCE_DIMENSION_TEXTURE3D: + case REFLECT_RESOURCE_DIMENSION_TEXTURE2DARRAY: + case REFLECT_RESOURCE_DIMENSION_TEXTURE2DMSARRAY: + case REFLECT_RESOURCE_DIMENSION_TEXTURECUBEARRAY: + texDim = 3; + break; + default: + ASSERT(0); + break; + } + } + else if (psBinding->eType == RTYPE_UAV_RWSTRUCTURED) + { + if (DeclareRWStructuredBufferTemplateTypeAsInteger(psContext, dest)) + { + isUint = false; + ui32DstDataTypeFlag |= TO_FLAG_INTEGER; + } + } + } + + if (isUint && HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) + ui32DataTypeFlag = TO_FLAG_UNSIGNED_INTEGER | TO_AUTO_BITCAST_TO_UINT; + else + ui32DataTypeFlag = TO_FLAG_INTEGER | TO_AUTO_BITCAST_TO_INT; + + if (previousValue) + AddAssignToDest(previousValue, isUint ? SVT_UINT : SVT_INT, 1, &numParenthesis); + + if (texDim > 0) + bcatcstr(glsl, "imageAtomic"); + else + bcatcstr(glsl, "atomic"); + + bcatcstr(glsl, func); + bcatcstr(glsl, "("); + + TranslateOperand(dest, ui32DstDataTypeFlag); + + if (texDim > 0) + { + bcatcstr(glsl, ", "); + unsigned int compMask = OPERAND_4_COMPONENT_MASK_X; + if (texDim >= 2) + compMask |= OPERAND_4_COMPONENT_MASK_Y; + if (texDim == 3) + compMask |= OPERAND_4_COMPONENT_MASK_Z; + + TranslateOperand(destAddr, TO_FLAG_INTEGER, compMask); + } + else + { + if (dest->eType != OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY) + bcatcstr(glsl, "_buf"); + + uint32_t destAddrFlag = TO_FLAG_UNSIGNED_INTEGER; + SHADER_VARIABLE_TYPE destAddrType = destAddr->GetDataType(psContext); + if (!HaveUnsignedTypes(psContext->psShader->eTargetLanguage) || destAddrType == SVT_INT || destAddrType == SVT_INT16 || destAddrType == SVT_INT12) + destAddrFlag = TO_FLAG_INTEGER; + + bcatcstr(glsl, "["); + TranslateOperand(destAddr, destAddrFlag, OPERAND_4_COMPONENT_MASK_X); + + // Structured buf if we have both x & y swizzles. Raw buf has only x -> no .value[] + if (destAddr->GetNumSwizzleElements(OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y) == 2) + { + bcatcstr(glsl, "]"); + + bcatcstr(glsl, ".value["); + TranslateOperand(destAddr, destAddrFlag, OPERAND_4_COMPONENT_MASK_Y); + } + + bcatcstr(glsl, " >> 2");//bytes to floats + if (destAddrFlag == TO_FLAG_UNSIGNED_INTEGER) + bcatcstr(glsl, "u"); + + bcatcstr(glsl, "]"); + } + + bcatcstr(glsl, ", "); + + if (compare) + { + TranslateOperand(compare, ui32DataTypeFlag); + bcatcstr(glsl, ", "); + } + + TranslateOperand(src, ui32DataTypeFlag); + bcatcstr(glsl, ")"); + if (previousValue) + { + AddAssignPrologue(numParenthesis); + } + else + bcatcstr(glsl, ";\n"); } void ToGLSL::TranslateConditional( - Instruction* psInst, - bstring glsl) + Instruction* psInst, + bstring glsl) { - const char* statement = ""; - if (psInst->eOpcode == OPCODE_BREAKC) - { - statement = "break"; - } - else if (psInst->eOpcode == OPCODE_CONTINUEC) - { - statement = "continue"; - } - else if (psInst->eOpcode == OPCODE_RETC) // FIXME! Need to spew out shader epilogue - { - statement = "return"; - } - - if (psInst->m_IsStaticBranch) - { - // Instead of the actual condition, use the specialization constant instead - - // But first we'll have to make sure the original values don't get dropped out (we rely on glslang not being very smart) - bcatcstr(glsl, "if(false)\n {\n"); - } - - SHADER_VARIABLE_TYPE argType = psInst->asOperands[0].GetDataType(psContext); - if (argType == SVT_BOOL) - { - bcatcstr(glsl, "if("); - if (psInst->eBooleanTestType != INSTRUCTION_TEST_NONZERO) - bcatcstr(glsl, "!"); - TranslateOperand(&psInst->asOperands[0], TO_FLAG_BOOL); - if (psInst->eOpcode != OPCODE_IF) - { - bformata(glsl, "){%s;}\n", statement); - } - else - { - bcatcstr(glsl, "){\n"); - } - } - else - { - uint32_t oFlag = TO_FLAG_UNSIGNED_INTEGER; - bool isInt = false; - if (!HaveUnsignedTypes(psContext->psShader->eTargetLanguage) || argType == SVT_INT || argType == SVT_INT16 || argType == SVT_INT12) - { - isInt = true; - oFlag = TO_FLAG_INTEGER; - } - - bcatcstr(glsl, "if("); - TranslateOperand(&psInst->asOperands[0], oFlag); - - if (psInst->eBooleanTestType == INSTRUCTION_TEST_ZERO) - bcatcstr(glsl, " == "); - else - bcatcstr(glsl, " != "); - - if (isInt) - bcatcstr(glsl, "0)"); - else - bcatcstr(glsl, "uint(0u))"); - - - if (psInst->eOpcode != OPCODE_IF) - { - bformata(glsl, " {%s;}\n", statement); - } - else - { - bcatcstr(glsl, " {\n"); - } - } - if (psInst->m_IsStaticBranch) - { - if (psInst->eOpcode == OPCODE_IF) - { - bcatcstr(glsl, "}\n}\n"); - } - else - { - bcatcstr(glsl, "}\n"); - } - bcatcstr(glsl, "if("); - if (psInst->eBooleanTestType != INSTRUCTION_TEST_NONZERO) - bcatcstr(glsl, "!"); - bcatcstr(glsl, psInst->m_StaticBranchName.c_str()); - if (psInst->eOpcode != OPCODE_IF) - { - bformata(glsl, "){%s;}\n", statement); - } - else - { - bcatcstr(glsl, "){\n"); - } - return; - - } - + const char* statement = ""; + if (psInst->eOpcode == OPCODE_BREAKC) + { + statement = "break"; + } + else if (psInst->eOpcode == OPCODE_CONTINUEC) + { + statement = "continue"; + } + else if (psInst->eOpcode == OPCODE_RETC) // FIXME! Need to spew out shader epilogue + { + statement = "return"; + } + + if (psInst->m_IsStaticBranch) + { + // Instead of the actual condition, use the specialization constant instead + + // But first we'll have to make sure the original values don't get dropped out (we rely on glslang not being very smart) + bcatcstr(glsl, "if(false)\n {\n"); + } + + SHADER_VARIABLE_TYPE argType = psInst->asOperands[0].GetDataType(psContext); + if (argType == SVT_BOOL) + { + bcatcstr(glsl, "if("); + if (psInst->eBooleanTestType != INSTRUCTION_TEST_NONZERO) + bcatcstr(glsl, "!"); + TranslateOperand(&psInst->asOperands[0], TO_FLAG_BOOL); + if (psInst->eOpcode != OPCODE_IF) + { + bformata(glsl, "){%s;}\n", statement); + } + else + { + bcatcstr(glsl, "){\n"); + } + } + else + { + uint32_t oFlag = TO_FLAG_UNSIGNED_INTEGER; + bool isInt = false; + if (!HaveUnsignedTypes(psContext->psShader->eTargetLanguage) || argType == SVT_INT || argType == SVT_INT16 || argType == SVT_INT12) + { + isInt = true; + oFlag = TO_FLAG_INTEGER; + } + + bcatcstr(glsl, "if("); + TranslateOperand(&psInst->asOperands[0], oFlag); + + if (psInst->eBooleanTestType == INSTRUCTION_TEST_ZERO) + bcatcstr(glsl, " == "); + else + bcatcstr(glsl, " != "); + + bcatcstr(glsl, isInt ? "0)" : "uint(0))"); // Old ES3.0 Adrenos treat 0u as const int. + + if (psInst->eOpcode != OPCODE_IF) + { + bformata(glsl, " {%s;}\n", statement); + } + else + { + bcatcstr(glsl, " {\n"); + } + } + if (psInst->m_IsStaticBranch) + { + if (psInst->eOpcode == OPCODE_IF) + { + bcatcstr(glsl, "}\n}\n"); + } + else + { + bcatcstr(glsl, "}\n"); + } + bcatcstr(glsl, "if("); + if (psInst->eBooleanTestType != INSTRUCTION_TEST_NONZERO) + bcatcstr(glsl, "!"); + bcatcstr(glsl, psInst->m_StaticBranchName.c_str()); + if (psInst->eOpcode != OPCODE_IF) + { + bformata(glsl, "){%s;}\n", statement); + } + else + { + bcatcstr(glsl, "){\n"); + } + return; + } } void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = false */) { - bstring glsl = *psContext->currentGLSLString; - int numParenthesis = 0; - const bool isVulkan = ((psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0); - const bool avoidAtomicCounter = ((psContext->flags & HLSLCC_FLAG_AVOID_SHADER_ATOMIC_COUNTERS) != 0); - - if (!isEmbedded) - { + bstring glsl = *psContext->currentGLSLString; + int numParenthesis = 0; + const bool isVulkan = ((psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0); + const bool avoidAtomicCounter = ((psContext->flags & HLSLCC_FLAG_AVOID_SHADER_ATOMIC_COUNTERS) != 0); + if (!isEmbedded) + { #ifdef _DEBUG - // Uncomment to print instruction IDs - //psContext->AddIndentation(); - //bformata(glsl, "//Instruction %d\n", psInst->id); + // Uncomment to print instruction IDs + //psContext->AddIndentation(); + //bformata(glsl, "//Instruction %d\n", psInst->id); #if 0 - if (psInst->id == 73) - { - ASSERT(1); //Set breakpoint here to debug an instruction from its ID. - } -#endif -#endif - - if (psInst->m_SkipTranslation) - return; - } - - switch (psInst->eOpcode) - { - case OPCODE_FTOI: - case OPCODE_FTOU: - { - uint32_t dstCount = psInst->asOperands[0].GetNumSwizzleElements(); - uint32_t srcCount = psInst->asOperands[1].GetNumSwizzleElements(); - SHADER_VARIABLE_TYPE castType = psInst->eOpcode == OPCODE_FTOU ? SVT_UINT : SVT_INT; -#ifdef _DEBUG - psContext->AddIndentation(); - if (psInst->eOpcode == OPCODE_FTOU) - bcatcstr(glsl, "//FTOU\n"); - else - bcatcstr(glsl, "//FTOI\n"); -#endif - switch (psInst->asOperands[0].eMinPrecision) - { - case OPERAND_MIN_PRECISION_DEFAULT: - break; - case OPERAND_MIN_PRECISION_SINT_16: - castType = SVT_INT16; - ASSERT(psInst->eOpcode == OPCODE_FTOI); - break; - case OPERAND_MIN_PRECISION_UINT_16: - castType = SVT_UINT16; - ASSERT(psInst->eOpcode == OPCODE_FTOU); - break; - default: - ASSERT(0); // We'd be doing bitcasts into low/mediump ints, not good. - } - psContext->AddIndentation(); - - AddAssignToDest(&psInst->asOperands[0], castType, srcCount, &numParenthesis); - bcatcstr(glsl, GetConstructorForTypeGLSL(psContext, castType, dstCount, false)); - bcatcstr(glsl, "("); // 1 - TranslateOperand(&psInst->asOperands[1], TO_AUTO_BITCAST_TO_FLOAT, psInst->asOperands[0].GetAccessMask()); - bcatcstr(glsl, ")"); // 1 - AddAssignPrologue(numParenthesis); - break; - } - - case OPCODE_MOV: - { -#ifdef _DEBUG - if (!isEmbedded) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//MOV\n"); - } -#endif - if(!isEmbedded) - psContext->AddIndentation(); - - AddMOVBinaryOp(&psInst->asOperands[0], &psInst->asOperands[1], isEmbedded); - break; - } - case OPCODE_ITOF://signed to float - case OPCODE_UTOF://unsigned to float - { - SHADER_VARIABLE_TYPE castType = SVT_FLOAT; - uint32_t dstCount = psInst->asOperands[0].GetNumSwizzleElements(); - uint32_t srcCount = psInst->asOperands[1].GetNumSwizzleElements(); - -#ifdef _DEBUG - psContext->AddIndentation(); - if (psInst->eOpcode == OPCODE_ITOF) - { - bcatcstr(glsl, "//ITOF\n"); - } - else - { - bcatcstr(glsl, "//UTOF\n"); - } -#endif - - switch (psInst->asOperands[0].eMinPrecision) - { - case OPERAND_MIN_PRECISION_DEFAULT: - break; - case OPERAND_MIN_PRECISION_FLOAT_2_8: - castType = SVT_FLOAT10; - break; - case OPERAND_MIN_PRECISION_FLOAT_16: - castType = SVT_FLOAT16; - break; - default: - ASSERT(0); // We'd be doing bitcasts into low/mediump ints, not good. - } - - psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], castType, srcCount, &numParenthesis); - bcatcstr(glsl, GetConstructorForTypeGLSL(psContext, castType, dstCount, false)); - bcatcstr(glsl, "("); // 1 - TranslateOperand(&psInst->asOperands[1], psInst->eOpcode == OPCODE_UTOF ? TO_AUTO_BITCAST_TO_UINT : TO_AUTO_BITCAST_TO_INT, psInst->asOperands[0].GetAccessMask()); - bcatcstr(glsl, ")"); // 1 - AddAssignPrologue(numParenthesis); - break; - } - case OPCODE_MAD: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//MAD\n"); -#endif - CallTernaryOp("*", "+", psInst, 0, 1, 2, 3, TO_FLAG_NONE); - break; - } - case OPCODE_IMAD: - { - uint32_t ui32Flags = TO_FLAG_INTEGER; -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//IMAD\n"); -#endif - - if (psInst->asOperands[0].GetDataType(psContext) == SVT_UINT) - { - ui32Flags = TO_FLAG_UNSIGNED_INTEGER; - } - - CallTernaryOp("*", "+", psInst, 0, 1, 2, 3, ui32Flags); - break; - } - case OPCODE_DADD: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//DADD\n"); -#endif - CallBinaryOp("+", psInst, 0, 1, 2, SVT_DOUBLE); - break; - } - case OPCODE_IADD: - { - SHADER_VARIABLE_TYPE eType = SVT_INT; -#ifdef _DEBUG - if (!isEmbedded) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//IADD\n"); - } -#endif - //Is this a signed or unsigned add? - if (psInst->asOperands[0].GetDataType(psContext) == SVT_UINT) - { - eType = SVT_UINT; - } - CallBinaryOp("+", psInst, 0, 1, 2, eType, isEmbedded); - break; - } - case OPCODE_ADD: - { - -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ADD\n"); -#endif - CallBinaryOp("+", psInst, 0, 1, 2, SVT_FLOAT); - break; - } - case OPCODE_OR: - { - /*Todo: vector version */ -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//OR\n"); -#endif - uint32_t dstSwizCount = psInst->asOperands[0].GetNumSwizzleElements(); - uint32_t destMask = psInst->asOperands[0].GetAccessMask(); - if (psInst->asOperands[0].GetDataType(psContext) == SVT_BOOL) - { - if (dstSwizCount == 1) - { - uint32_t destMask = psInst->asOperands[0].GetAccessMask(); - - int needsParenthesis = 0; - psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], SVT_BOOL, psInst->asOperands[0].GetNumSwizzleElements(), &needsParenthesis); - TranslateOperand(&psInst->asOperands[1], TO_FLAG_BOOL, destMask); - bcatcstr(glsl, " || "); - TranslateOperand(&psInst->asOperands[2], TO_FLAG_BOOL, destMask); - AddAssignPrologue(needsParenthesis); - } - else - { - // Do component-wise and, glsl doesn't support || on bvecs - for (uint32_t k = 0; k < 4; k++) - { - if ((destMask & (1 << k)) == 0) - continue; - - int needsParenthesis = 0; - psContext->AddIndentation(); - // Override dest mask temporarily - psInst->asOperands[0].ui32CompMask = (1 << k); - ASSERT(psInst->asOperands[0].eSelMode == OPERAND_4_COMPONENT_MASK_MODE); - AddAssignToDest(&psInst->asOperands[0], SVT_BOOL, 1, &needsParenthesis); - TranslateOperand(&psInst->asOperands[1], TO_FLAG_BOOL, 1 << k); - bcatcstr(glsl, " || "); - TranslateOperand(&psInst->asOperands[2], TO_FLAG_BOOL, 1 << k); - AddAssignPrologue(needsParenthesis); - - } - // Restore old mask - psInst->asOperands[0].ui32CompMask = destMask; - } - - } - else - CallBinaryOp("|", psInst, 0, 1, 2, SVT_UINT); - break; - } - case OPCODE_AND: - { - SHADER_VARIABLE_TYPE eA = psInst->asOperands[1].GetDataType(psContext); - SHADER_VARIABLE_TYPE eB = psInst->asOperands[2].GetDataType(psContext); -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//AND\n"); -#endif - uint32_t destMask = psInst->asOperands[0].GetAccessMask(); - uint32_t dstSwizCount = psInst->asOperands[0].GetNumSwizzleElements(); - SHADER_VARIABLE_TYPE eDataType = psInst->asOperands[0].GetDataType(psContext); - uint32_t ui32Flags = SVTTypeToFlag(eDataType); - if (psInst->asOperands[0].GetDataType(psContext) == SVT_BOOL) - { - if (dstSwizCount == 1) - { - int needsParenthesis = 0; - psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], SVT_BOOL, psInst->asOperands[0].GetNumSwizzleElements(), &needsParenthesis); - TranslateOperand(&psInst->asOperands[1], TO_FLAG_BOOL, destMask); - bcatcstr(glsl, " && "); - TranslateOperand(&psInst->asOperands[2], TO_FLAG_BOOL, destMask); - AddAssignPrologue(needsParenthesis); - } - else - { - // Do component-wise and, glsl doesn't support && on bvecs - for (uint32_t k = 0; k < 4; k++) - { - if ((destMask & (1 << k)) == 0) - continue; - - int needsParenthesis = 0; - psContext->AddIndentation(); - // Override dest mask temporarily - psInst->asOperands[0].ui32CompMask = (1 << k); - ASSERT(psInst->asOperands[0].eSelMode == OPERAND_4_COMPONENT_MASK_MODE); - AddAssignToDest(&psInst->asOperands[0], SVT_BOOL, 1, &needsParenthesis); - TranslateOperand(&psInst->asOperands[1], TO_FLAG_BOOL, 1 << k); - bcatcstr(glsl, " && "); - TranslateOperand(&psInst->asOperands[2], TO_FLAG_BOOL, 1 << k); - AddAssignPrologue(needsParenthesis); - - } - // Restore old mask - psInst->asOperands[0].ui32CompMask = destMask; - } - } - else if ((eA == SVT_BOOL || eB == SVT_BOOL) && !(eA == SVT_BOOL && eB == SVT_BOOL)) - { - int boolOp = eA == SVT_BOOL ? 1 : 2; - int otherOp = eA == SVT_BOOL ? 2 : 1; - int needsParenthesis = 0; - uint32_t i; - psContext->AddIndentation(); - - if (dstSwizCount == 1) - { - AddAssignToDest(&psInst->asOperands[0], eDataType, dstSwizCount, &needsParenthesis); - TranslateOperand(&psInst->asOperands[boolOp], TO_FLAG_BOOL, destMask); - bcatcstr(glsl, " ? "); - TranslateOperand(&psInst->asOperands[otherOp], ui32Flags, destMask); - bcatcstr(glsl, " : "); - - bcatcstr(glsl, GetConstructorForTypeGLSL(psContext, eDataType, dstSwizCount, false)); - bcatcstr(glsl, "("); - for (i = 0; i < dstSwizCount; i++) - { - if (i > 0) - bcatcstr(glsl, ", "); - switch (eDataType) - { - case SVT_FLOAT: - case SVT_FLOAT10: - case SVT_FLOAT16: - case SVT_DOUBLE: - bcatcstr(glsl, "0.0"); - break; - default: - bcatcstr(glsl, "0"); - - } - } - bcatcstr(glsl, ")"); - } - else if (eDataType == SVT_FLOAT) - { - // We can use mix() - AddAssignToDest(&psInst->asOperands[0], eDataType, dstSwizCount, &needsParenthesis); - bcatcstr(glsl, "mix("); - bcatcstr(glsl, GetConstructorForTypeGLSL(psContext, eDataType, dstSwizCount, false)); - bcatcstr(glsl, "("); - for (i = 0; i < dstSwizCount; i++) - { - if (i > 0) - bcatcstr(glsl, ", "); - switch (eDataType) - { - case SVT_FLOAT: - case SVT_FLOAT10: - case SVT_FLOAT16: - case SVT_DOUBLE: - bcatcstr(glsl, "0.0"); - break; - default: - bcatcstr(glsl, "0"); - - } - } - bcatcstr(glsl, "), "); - TranslateOperand(&psInst->asOperands[otherOp], ui32Flags, destMask); - bcatcstr(glsl, ", "); - bcatcstr(glsl, GetConstructorForTypeGLSL(psContext, eDataType, dstSwizCount, false)); - bcatcstr(glsl, "("); - TranslateOperand(&psInst->asOperands[boolOp], TO_FLAG_BOOL, destMask); - bcatcstr(glsl, ")"); - bcatcstr(glsl, ")"); - } - else - { - AddAssignToDest(&psInst->asOperands[0], SVT_UINT, dstSwizCount, &needsParenthesis); - bcatcstr(glsl, "("); - bcatcstr(glsl, GetConstructorForTypeGLSL(psContext, SVT_UINT, dstSwizCount, false)); - bcatcstr(glsl, "("); - TranslateOperand(&psInst->asOperands[boolOp], TO_FLAG_BOOL, destMask); - if (HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) - bcatcstr(glsl, ") * 0xffffffffu) & "); - else - bcatcstr(glsl, ") * 0xffffffff) & "); - TranslateOperand(&psInst->asOperands[otherOp], TO_FLAG_UNSIGNED_INTEGER, destMask); - } - - AddAssignPrologue(needsParenthesis); - } - else - { - CallBinaryOp("&", psInst, 0, 1, 2, SVT_UINT); - } - - break; - } - case OPCODE_GE: - { - /* - dest = vec4(greaterThanEqual(vec4(srcA), vec4(srcB)); - Caveat: The result is a boolean but HLSL asm returns 0xFFFFFFFF/0x0 instead. - */ -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//GE\n"); -#endif - AddComparison(psInst, CMP_GE, TO_FLAG_NONE); - break; - } - case OPCODE_MUL: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//MUL\n"); -#endif - CallBinaryOp("*", psInst, 0, 1, 2, SVT_FLOAT); - break; - } - case OPCODE_IMUL: - { - SHADER_VARIABLE_TYPE eType = SVT_INT; -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//IMUL\n"); -#endif - if (psInst->asOperands[1].GetDataType(psContext) == SVT_UINT) - { - eType = SVT_UINT; - } - - ASSERT(psInst->asOperands[0].eType == OPERAND_TYPE_NULL); - - CallBinaryOp("*", psInst, 1, 2, 3, eType); - break; - } - case OPCODE_UDIV: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//UDIV\n"); -#endif - //destQuotient, destRemainder, src0, src1 - - // There are cases where destQuotient is the same variable as src0 or src1. If that happens, - // we need to compute "%" before the "/" in order to avoid src0 or src1 being overriden first. - if ((psInst->asOperands[0].eType != psInst->asOperands[2].eType || psInst->asOperands[0].ui32RegisterNumber != psInst->asOperands[2].ui32RegisterNumber) - && (psInst->asOperands[0].eType != psInst->asOperands[3].eType || psInst->asOperands[0].ui32RegisterNumber != psInst->asOperands[3].ui32RegisterNumber)) - { - CallBinaryOp("/", psInst, 0, 2, 3, SVT_UINT); - CallBinaryOp("%", psInst, 1, 2, 3, SVT_UINT); - } - else - { - CallBinaryOp("%", psInst, 1, 2, 3, SVT_UINT); - CallBinaryOp("/", psInst, 0, 2, 3, SVT_UINT); - } - break; - } - case OPCODE_DIV: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//DIV\n"); -#endif - CallBinaryOp("/", psInst, 0, 1, 2, SVT_FLOAT); - break; - } - case OPCODE_SINCOS: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//SINCOS\n"); -#endif - // Need careful ordering if src == dest[0], as then the cos() will be reading from wrong value - if (psInst->asOperands[0].eType == psInst->asOperands[2].eType && - psInst->asOperands[0].ui32RegisterNumber == psInst->asOperands[2].ui32RegisterNumber) - { - // sin() result overwrites source, do cos() first. - // The case where both write the src shouldn't really happen anyway. - if (psInst->asOperands[1].eType != OPERAND_TYPE_NULL) - { - CallHelper1("cos", psInst, 1, 2, 1); - } - - if (psInst->asOperands[0].eType != OPERAND_TYPE_NULL) - { - CallHelper1( - "sin", psInst, 0, 2, 1); - } - } - else - { - if (psInst->asOperands[0].eType != OPERAND_TYPE_NULL) - { - CallHelper1("sin", psInst, 0, 2, 1); - } - - if (psInst->asOperands[1].eType != OPERAND_TYPE_NULL) - { - CallHelper1("cos", psInst, 1, 2, 1); - } - } - break; - } - - case OPCODE_DP2: - { - int numParenthesis = 0; -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//DP2\n"); -#endif - psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], SVT_FLOAT, 1, &numParenthesis); - bcatcstr(glsl, "dot("); - TranslateOperand(&psInst->asOperands[1], TO_AUTO_BITCAST_TO_FLOAT | TO_AUTO_EXPAND_TO_VEC2, 3 /* .xy */); - bcatcstr(glsl, ", "); - TranslateOperand(&psInst->asOperands[2], TO_AUTO_BITCAST_TO_FLOAT | TO_AUTO_EXPAND_TO_VEC2, 3 /* .xy */); - bcatcstr(glsl, ")"); - AddAssignPrologue(numParenthesis); - break; - } - case OPCODE_DP3: - { - int numParenthesis = 0; -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//DP3\n"); -#endif - psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], SVT_FLOAT, 1, &numParenthesis); - bcatcstr(glsl, "dot("); - TranslateOperand(&psInst->asOperands[1], TO_AUTO_BITCAST_TO_FLOAT | TO_AUTO_EXPAND_TO_VEC3, 7 /* .xyz */); - bcatcstr(glsl, ", "); - TranslateOperand(&psInst->asOperands[2], TO_AUTO_BITCAST_TO_FLOAT | TO_AUTO_EXPAND_TO_VEC3, 7 /* .xyz */); - bcatcstr(glsl, ")"); - AddAssignPrologue(numParenthesis); - break; - } - case OPCODE_DP4: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//DP4\n"); -#endif - CallHelper2("dot", psInst, 0, 1, 2, 0); - break; - } - case OPCODE_INE: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//INE\n"); -#endif - AddComparison(psInst, CMP_NE, TO_FLAG_INTEGER); - break; - } - case OPCODE_NE: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//NE\n"); -#endif - AddComparison(psInst, CMP_NE, TO_FLAG_NONE); - break; - } - case OPCODE_IGE: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//IGE\n"); -#endif - AddComparison(psInst, CMP_GE, TO_FLAG_INTEGER); - break; - } - case OPCODE_ILT: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ILT\n"); -#endif - AddComparison(psInst, CMP_LT, TO_FLAG_INTEGER); - break; - } - case OPCODE_LT: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//LT\n"); -#endif - AddComparison(psInst, CMP_LT, TO_FLAG_NONE); - break; - } - case OPCODE_IEQ: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//IEQ\n"); -#endif - AddComparison(psInst, CMP_EQ, TO_FLAG_INTEGER); - break; - } - case OPCODE_ULT: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ULT\n"); -#endif - AddComparison(psInst, CMP_LT, TO_FLAG_UNSIGNED_INTEGER); - break; - } - case OPCODE_UGE: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//UGE\n"); -#endif - AddComparison(psInst, CMP_GE, TO_FLAG_UNSIGNED_INTEGER); - break; - } - case OPCODE_MOVC: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//MOVC\n"); -#endif - AddMOVCBinaryOp(&psInst->asOperands[0], &psInst->asOperands[1], &psInst->asOperands[2], &psInst->asOperands[3]); - break; - } - case OPCODE_SWAPC: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//SWAPC\n"); -#endif - // TODO needs temps!! - AddMOVCBinaryOp(&psInst->asOperands[0], &psInst->asOperands[2], &psInst->asOperands[4], &psInst->asOperands[3]); - AddMOVCBinaryOp(&psInst->asOperands[1], &psInst->asOperands[2], &psInst->asOperands[3], &psInst->asOperands[4]); - break; - } - - case OPCODE_LOG: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//LOG\n"); -#endif - CallHelper1("log2", psInst, 0, 1, 1); - break; - } - case OPCODE_RSQ: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//RSQ\n"); -#endif - CallHelper1("inversesqrt", psInst, 0, 1, 1); - break; - } - case OPCODE_EXP: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//EXP\n"); -#endif - CallHelper1("exp2", psInst, 0, 1, 1); - break; - } - case OPCODE_SQRT: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//SQRT\n"); -#endif - CallHelper1("sqrt", psInst, 0, 1, 1); - break; - } - case OPCODE_ROUND_PI: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ROUND_PI\n"); -#endif - CallHelper1("ceil", psInst, 0, 1, 1); - break; - } - case OPCODE_ROUND_NI: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ROUND_NI\n"); -#endif - CallHelper1("floor", psInst, 0, 1, 1); - break; - } - case OPCODE_ROUND_Z: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ROUND_Z\n"); -#endif - if (psContext->psShader->eTargetLanguage == LANG_ES_100) - UseExtraFunctionDependency("trunc"); - - CallHelper1("trunc", psInst, 0, 1, 1); - break; - } - case OPCODE_ROUND_NE: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ROUND_NE\n"); -#endif - - if (psContext->psShader->eTargetLanguage == LANG_ES_100) - UseExtraFunctionDependency("roundEven"); - - CallHelper1("roundEven", psInst, 0, 1, 1); - break; - } - case OPCODE_FRC: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//FRC\n"); -#endif - CallHelper1("fract", psInst, 0, 1, 1); - break; - } - case OPCODE_IMAX: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//IMAX\n"); -#endif - if (psContext->psShader->eTargetLanguage == LANG_ES_100) - CallHelper2("max", psInst, 0, 1, 2, 1); - else - CallHelper2Int("max", psInst, 0, 1, 2, 1); - break; - } - case OPCODE_UMAX: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//UMAX\n"); -#endif - if (psContext->psShader->eTargetLanguage == LANG_ES_100) - CallHelper2("max", psInst, 0, 1, 2, 1); - else - CallHelper2UInt("max", psInst, 0, 1, 2, 1); - break; - } - case OPCODE_MAX: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//MAX\n"); -#endif - CallHelper2("max", psInst, 0, 1, 2, 1); - break; - } - case OPCODE_IMIN: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//IMIN\n"); -#endif - if (psContext->psShader->eTargetLanguage == LANG_ES_100) - CallHelper2("min", psInst, 0, 1, 2, 1); - else - CallHelper2Int("min", psInst, 0, 1, 2, 1); - break; - } - case OPCODE_UMIN: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//UMIN\n"); -#endif - if (psContext->psShader->eTargetLanguage == LANG_ES_100) - CallHelper2("min", psInst, 0, 1, 2, 1); - else - CallHelper2UInt("min", psInst, 0, 1, 2, 1); - break; - } - case OPCODE_MIN: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//MIN\n"); -#endif - CallHelper2("min", psInst, 0, 1, 2, 1); - break; - } - case OPCODE_GATHER4: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//GATHER4\n"); -#endif - TranslateTextureSample(psInst, TEXSMP_FLAG_GATHER); - break; - } - case OPCODE_GATHER4_PO_C: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//GATHER4_PO_C\n"); -#endif - TranslateTextureSample(psInst, TEXSMP_FLAG_GATHER | TEXSMP_FLAG_PARAMOFFSET | TEXSMP_FLAG_DEPTHCOMPARE); - break; - } - case OPCODE_GATHER4_PO: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//GATHER4_PO\n"); -#endif - TranslateTextureSample(psInst, TEXSMP_FLAG_GATHER | TEXSMP_FLAG_PARAMOFFSET); - break; - } - case OPCODE_GATHER4_C: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//GATHER4_C\n"); -#endif - TranslateTextureSample(psInst, TEXSMP_FLAG_GATHER | TEXSMP_FLAG_DEPTHCOMPARE); - break; - } - case OPCODE_SAMPLE: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//SAMPLE\n"); + if (psInst->id == 73) + { + ASSERT(1); //Set breakpoint here to debug an instruction from its ID. + } +#endif +#endif + + if (psInst->m_SkipTranslation) + return; + } + + switch (psInst->eOpcode) + { + case OPCODE_FTOI: + case OPCODE_FTOU: + { + uint32_t dstCount = psInst->asOperands[0].GetNumSwizzleElements(); + uint32_t srcCount = psInst->asOperands[1].GetNumSwizzleElements(); + SHADER_VARIABLE_TYPE castType = psInst->eOpcode == OPCODE_FTOU ? SVT_UINT : SVT_INT; +#ifdef _DEBUG + psContext->AddIndentation(); + if (psInst->eOpcode == OPCODE_FTOU) + bcatcstr(glsl, "//FTOU\n"); + else + bcatcstr(glsl, "//FTOI\n"); +#endif + switch (psInst->asOperands[0].eMinPrecision) + { + case OPERAND_MIN_PRECISION_DEFAULT: + break; + case OPERAND_MIN_PRECISION_SINT_16: + castType = SVT_INT16; + ASSERT(psInst->eOpcode == OPCODE_FTOI); + break; + case OPERAND_MIN_PRECISION_UINT_16: + castType = SVT_UINT16; + ASSERT(psInst->eOpcode == OPCODE_FTOU); + break; + default: + ASSERT(0); // We'd be doing bitcasts into low/mediump ints, not good. + } + psContext->AddIndentation(); + + AddAssignToDest(&psInst->asOperands[0], castType, srcCount, &numParenthesis); + bcatcstr(glsl, GetConstructorForTypeGLSL(psContext, castType, dstCount, false)); + bcatcstr(glsl, "("); // 1 + TranslateOperand(&psInst->asOperands[1], TO_AUTO_BITCAST_TO_FLOAT, psInst->asOperands[0].GetAccessMask()); + bcatcstr(glsl, ")"); // 1 + AddAssignPrologue(numParenthesis); + break; + } + + case OPCODE_MOV: + { +#ifdef _DEBUG + if (!isEmbedded) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//MOV\n"); + } +#endif + if (!isEmbedded) + psContext->AddIndentation(); + + AddMOVBinaryOp(&psInst->asOperands[0], &psInst->asOperands[1], isEmbedded); + break; + } + case OPCODE_ITOF://signed to float + case OPCODE_UTOF://unsigned to float + { + SHADER_VARIABLE_TYPE castType = SVT_FLOAT; + uint32_t dstCount = psInst->asOperands[0].GetNumSwizzleElements(); + uint32_t srcCount = psInst->asOperands[1].GetNumSwizzleElements(); + +#ifdef _DEBUG + psContext->AddIndentation(); + if (psInst->eOpcode == OPCODE_ITOF) + { + bcatcstr(glsl, "//ITOF\n"); + } + else + { + bcatcstr(glsl, "//UTOF\n"); + } +#endif + + switch (psInst->asOperands[0].eMinPrecision) + { + case OPERAND_MIN_PRECISION_DEFAULT: + break; + case OPERAND_MIN_PRECISION_FLOAT_2_8: + castType = SVT_FLOAT10; + break; + case OPERAND_MIN_PRECISION_FLOAT_16: + castType = SVT_FLOAT16; + break; + default: + ASSERT(0); // We'd be doing bitcasts into low/mediump ints, not good. + } + + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], castType, srcCount, &numParenthesis); + bcatcstr(glsl, GetConstructorForTypeGLSL(psContext, castType, dstCount, false)); + bcatcstr(glsl, "("); // 1 + TranslateOperand(&psInst->asOperands[1], psInst->eOpcode == OPCODE_UTOF ? TO_AUTO_BITCAST_TO_UINT : TO_AUTO_BITCAST_TO_INT, psInst->asOperands[0].GetAccessMask()); + bcatcstr(glsl, ")"); // 1 + AddAssignPrologue(numParenthesis); + break; + } + case OPCODE_MAD: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//MAD\n"); +#endif + CallTernaryOp("*", "+", psInst, 0, 1, 2, 3, TO_FLAG_NONE); + break; + } + case OPCODE_IMAD: + { + uint32_t ui32Flags = TO_FLAG_INTEGER; +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IMAD\n"); +#endif + + if (psInst->asOperands[0].GetDataType(psContext) == SVT_UINT) + { + ui32Flags = TO_FLAG_UNSIGNED_INTEGER; + } + + CallTernaryOp("*", "+", psInst, 0, 1, 2, 3, ui32Flags); + break; + } + case OPCODE_DADD: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//DADD\n"); +#endif + CallBinaryOp("+", psInst, 0, 1, 2, SVT_DOUBLE); + break; + } + case OPCODE_IADD: + { + SHADER_VARIABLE_TYPE eType = SVT_INT; +#ifdef _DEBUG + if (!isEmbedded) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IADD\n"); + } +#endif + //Is this a signed or unsigned add? + if (psInst->asOperands[0].GetDataType(psContext) == SVT_UINT) + { + eType = SVT_UINT; + } + CallBinaryOp("+", psInst, 0, 1, 2, eType, isEmbedded); + break; + } + case OPCODE_ADD: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ADD\n"); +#endif + CallBinaryOp("+", psInst, 0, 1, 2, SVT_FLOAT); + break; + } + case OPCODE_OR: + { + /*Todo: vector version */ +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//OR\n"); +#endif + uint32_t dstSwizCount = psInst->asOperands[0].GetNumSwizzleElements(); + uint32_t destMask = psInst->asOperands[0].GetAccessMask(); + if (psInst->asOperands[0].GetDataType(psContext) == SVT_BOOL) + { + if (dstSwizCount == 1) + { + uint32_t destMask = psInst->asOperands[0].GetAccessMask(); + + int needsParenthesis = 0; + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], SVT_BOOL, psInst->asOperands[0].GetNumSwizzleElements(), &needsParenthesis); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_BOOL, destMask); + bcatcstr(glsl, " || "); + TranslateOperand(&psInst->asOperands[2], TO_FLAG_BOOL, destMask); + AddAssignPrologue(needsParenthesis); + } + else + { + // Do component-wise and, glsl doesn't support || on bvecs + for (uint32_t k = 0; k < 4; k++) + { + if ((destMask & (1 << k)) == 0) + continue; + + int needsParenthesis = 0; + psContext->AddIndentation(); + // Override dest mask temporarily + psInst->asOperands[0].ui32CompMask = (1 << k); + ASSERT(psInst->asOperands[0].eSelMode == OPERAND_4_COMPONENT_MASK_MODE); + AddAssignToDest(&psInst->asOperands[0], SVT_BOOL, 1, &needsParenthesis); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_BOOL, 1 << k); + bcatcstr(glsl, " || "); + TranslateOperand(&psInst->asOperands[2], TO_FLAG_BOOL, 1 << k); + AddAssignPrologue(needsParenthesis); + } + // Restore old mask + psInst->asOperands[0].ui32CompMask = destMask; + } + } + else + CallBinaryOp("|", psInst, 0, 1, 2, SVT_UINT); + break; + } + case OPCODE_AND: + { + SHADER_VARIABLE_TYPE eA = psInst->asOperands[1].GetDataType(psContext); + SHADER_VARIABLE_TYPE eB = psInst->asOperands[2].GetDataType(psContext); +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//AND\n"); +#endif + uint32_t destMask = psInst->asOperands[0].GetAccessMask(); + uint32_t dstSwizCount = psInst->asOperands[0].GetNumSwizzleElements(); + SHADER_VARIABLE_TYPE eDataType = psInst->asOperands[0].GetDataType(psContext); + uint32_t ui32Flags = SVTTypeToFlag(eDataType); + if (psInst->asOperands[0].GetDataType(psContext) == SVT_BOOL) + { + if (dstSwizCount == 1) + { + int needsParenthesis = 0; + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], SVT_BOOL, psInst->asOperands[0].GetNumSwizzleElements(), &needsParenthesis); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_BOOL, destMask); + bcatcstr(glsl, " && "); + TranslateOperand(&psInst->asOperands[2], TO_FLAG_BOOL, destMask); + AddAssignPrologue(needsParenthesis); + } + else + { + // Do component-wise and, glsl doesn't support && on bvecs + for (uint32_t k = 0; k < 4; k++) + { + if ((destMask & (1 << k)) == 0) + continue; + + int needsParenthesis = 0; + psContext->AddIndentation(); + // Override dest mask temporarily + psInst->asOperands[0].ui32CompMask = (1 << k); + ASSERT(psInst->asOperands[0].eSelMode == OPERAND_4_COMPONENT_MASK_MODE); + AddAssignToDest(&psInst->asOperands[0], SVT_BOOL, 1, &needsParenthesis); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_BOOL, 1 << k); + bcatcstr(glsl, " && "); + TranslateOperand(&psInst->asOperands[2], TO_FLAG_BOOL, 1 << k); + AddAssignPrologue(needsParenthesis); + } + // Restore old mask + psInst->asOperands[0].ui32CompMask = destMask; + } + } + else if ((eA == SVT_BOOL || eB == SVT_BOOL) && !(eA == SVT_BOOL && eB == SVT_BOOL)) + { + int boolOp = eA == SVT_BOOL ? 1 : 2; + int otherOp = eA == SVT_BOOL ? 2 : 1; + int needsParenthesis = 0; + uint32_t i; + psContext->AddIndentation(); + + if (dstSwizCount == 1) + { + AddAssignToDest(&psInst->asOperands[0], eDataType, dstSwizCount, &needsParenthesis); + TranslateOperand(&psInst->asOperands[boolOp], TO_FLAG_BOOL, destMask); + bcatcstr(glsl, " ? "); + TranslateOperand(&psInst->asOperands[otherOp], ui32Flags, destMask); + bcatcstr(glsl, " : "); + + bcatcstr(glsl, GetConstructorForTypeGLSL(psContext, eDataType, dstSwizCount, false)); + bcatcstr(glsl, "("); + for (i = 0; i < dstSwizCount; i++) + { + if (i > 0) + bcatcstr(glsl, ", "); + switch (eDataType) + { + case SVT_FLOAT: + case SVT_FLOAT10: + case SVT_FLOAT16: + case SVT_DOUBLE: + bcatcstr(glsl, "0.0"); + break; + default: + bcatcstr(glsl, "0"); + } + } + bcatcstr(glsl, ")"); + } + else if (eDataType == SVT_FLOAT) + { + // We can use mix() + AddAssignToDest(&psInst->asOperands[0], eDataType, dstSwizCount, &needsParenthesis); + bcatcstr(glsl, "mix("); + bcatcstr(glsl, GetConstructorForTypeGLSL(psContext, eDataType, dstSwizCount, false)); + bcatcstr(glsl, "("); + for (i = 0; i < dstSwizCount; i++) + { + if (i > 0) + bcatcstr(glsl, ", "); + switch (eDataType) + { + case SVT_FLOAT: + case SVT_FLOAT10: + case SVT_FLOAT16: + case SVT_DOUBLE: + bcatcstr(glsl, "0.0"); + break; + default: + bcatcstr(glsl, "0"); + } + } + bcatcstr(glsl, "), "); + TranslateOperand(&psInst->asOperands[otherOp], ui32Flags, destMask); + bcatcstr(glsl, ", "); + bcatcstr(glsl, GetConstructorForTypeGLSL(psContext, eDataType, dstSwizCount, false)); + bcatcstr(glsl, "("); + TranslateOperand(&psInst->asOperands[boolOp], TO_FLAG_BOOL, destMask); + bcatcstr(glsl, ")"); + bcatcstr(glsl, ")"); + } + else + { + AddAssignToDest(&psInst->asOperands[0], SVT_UINT, dstSwizCount, &needsParenthesis); + bcatcstr(glsl, "("); + bcatcstr(glsl, GetConstructorForTypeGLSL(psContext, SVT_UINT, dstSwizCount, false)); + bcatcstr(glsl, "("); + TranslateOperand(&psInst->asOperands[boolOp], TO_FLAG_BOOL, destMask); + if (HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) + bcatcstr(glsl, ") * 0xFFFFFFFFu) & "); + else + bcatcstr(glsl, ") * -1) & "); // GLSL ES 2 spec: high precision ints are guaranteed to have a range of at least (-2^16, 2^16) + TranslateOperand(&psInst->asOperands[otherOp], TO_FLAG_UNSIGNED_INTEGER, destMask); + } + + AddAssignPrologue(needsParenthesis); + } + else + { + CallBinaryOp("&", psInst, 0, 1, 2, SVT_UINT); + } + + break; + } + case OPCODE_GE: + { + /* + dest = vec4(greaterThanEqual(vec4(srcA), vec4(srcB)); + Caveat: The result is a boolean but HLSL asm returns 0xFFFFFFFF/0x0 instead. + */ +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//GE\n"); +#endif + AddComparison(psInst, CMP_GE, TO_FLAG_NONE); + break; + } + case OPCODE_MUL: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//MUL\n"); +#endif + CallBinaryOp("*", psInst, 0, 1, 2, SVT_FLOAT); + break; + } + case OPCODE_IMUL: + { + SHADER_VARIABLE_TYPE eType = SVT_INT; +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IMUL\n"); +#endif + if (psInst->asOperands[1].GetDataType(psContext) == SVT_UINT) + { + eType = SVT_UINT; + } + + ASSERT(psInst->asOperands[0].eType == OPERAND_TYPE_NULL); + + CallBinaryOp("*", psInst, 1, 2, 3, eType); + break; + } + case OPCODE_UDIV: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//UDIV\n"); +#endif + //destQuotient, destRemainder, src0, src1 + + // There are cases where destQuotient is the same variable as src0 or src1. If that happens, + // we need to compute "%" before the "/" in order to avoid src0 or src1 being overriden first. + if ((psInst->asOperands[0].eType != psInst->asOperands[2].eType || psInst->asOperands[0].ui32RegisterNumber != psInst->asOperands[2].ui32RegisterNumber) + && (psInst->asOperands[0].eType != psInst->asOperands[3].eType || psInst->asOperands[0].ui32RegisterNumber != psInst->asOperands[3].ui32RegisterNumber)) + { + CallBinaryOp("/", psInst, 0, 2, 3, SVT_UINT); + CallBinaryOp("%", psInst, 1, 2, 3, SVT_UINT); + } + else + { + CallBinaryOp("%", psInst, 1, 2, 3, SVT_UINT); + CallBinaryOp("/", psInst, 0, 2, 3, SVT_UINT); + } + break; + } + case OPCODE_DIV: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//DIV\n"); +#endif + CallBinaryOp("/", psInst, 0, 1, 2, SVT_FLOAT); + break; + } + case OPCODE_SINCOS: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//SINCOS\n"); +#endif + // Need careful ordering if src == dest[0], as then the cos() will be reading from wrong value + if (psInst->asOperands[0].eType == psInst->asOperands[2].eType && + psInst->asOperands[0].ui32RegisterNumber == psInst->asOperands[2].ui32RegisterNumber) + { + // sin() result overwrites source, do cos() first. + // The case where both write the src shouldn't really happen anyway. + if (psInst->asOperands[1].eType != OPERAND_TYPE_NULL) + { + CallHelper1("cos", psInst, 1, 2, 1); + } + + if (psInst->asOperands[0].eType != OPERAND_TYPE_NULL) + { + CallHelper1( + "sin", psInst, 0, 2, 1); + } + } + else + { + if (psInst->asOperands[0].eType != OPERAND_TYPE_NULL) + { + CallHelper1("sin", psInst, 0, 2, 1); + } + + if (psInst->asOperands[1].eType != OPERAND_TYPE_NULL) + { + CallHelper1("cos", psInst, 1, 2, 1); + } + } + break; + } + + case OPCODE_DP2: + { + int numParenthesis = 0; +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//DP2\n"); +#endif + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], SVT_FLOAT, 1, &numParenthesis); + bcatcstr(glsl, "dot("); + TranslateOperand(&psInst->asOperands[1], TO_AUTO_BITCAST_TO_FLOAT | TO_AUTO_EXPAND_TO_VEC2, 3 /* .xy */); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[2], TO_AUTO_BITCAST_TO_FLOAT | TO_AUTO_EXPAND_TO_VEC2, 3 /* .xy */); + bcatcstr(glsl, ")"); + AddAssignPrologue(numParenthesis); + break; + } + case OPCODE_DP3: + { + int numParenthesis = 0; +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//DP3\n"); +#endif + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], SVT_FLOAT, 1, &numParenthesis); + bcatcstr(glsl, "dot("); + TranslateOperand(&psInst->asOperands[1], TO_AUTO_BITCAST_TO_FLOAT | TO_AUTO_EXPAND_TO_VEC3, 7 /* .xyz */); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[2], TO_AUTO_BITCAST_TO_FLOAT | TO_AUTO_EXPAND_TO_VEC3, 7 /* .xyz */); + bcatcstr(glsl, ")"); + AddAssignPrologue(numParenthesis); + break; + } + case OPCODE_DP4: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//DP4\n"); +#endif + CallHelper2("dot", psInst, 0, 1, 2, 0); + break; + } + case OPCODE_INE: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//INE\n"); +#endif + AddComparison(psInst, CMP_NE, TO_FLAG_INTEGER); + break; + } + case OPCODE_NE: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//NE\n"); +#endif + AddComparison(psInst, CMP_NE, TO_FLAG_NONE); + break; + } + case OPCODE_IGE: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IGE\n"); +#endif + AddComparison(psInst, CMP_GE, TO_FLAG_INTEGER); + break; + } + case OPCODE_ILT: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ILT\n"); +#endif + AddComparison(psInst, CMP_LT, TO_FLAG_INTEGER); + break; + } + case OPCODE_LT: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//LT\n"); +#endif + AddComparison(psInst, CMP_LT, TO_FLAG_NONE); + break; + } + case OPCODE_IEQ: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IEQ\n"); +#endif + AddComparison(psInst, CMP_EQ, TO_FLAG_INTEGER); + break; + } + case OPCODE_ULT: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ULT\n"); +#endif + AddComparison(psInst, CMP_LT, TO_FLAG_UNSIGNED_INTEGER); + break; + } + case OPCODE_UGE: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//UGE\n"); +#endif + AddComparison(psInst, CMP_GE, TO_FLAG_UNSIGNED_INTEGER); + break; + } + case OPCODE_MOVC: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//MOVC\n"); +#endif + AddMOVCBinaryOp(&psInst->asOperands[0], &psInst->asOperands[1], &psInst->asOperands[2], &psInst->asOperands[3]); + break; + } + case OPCODE_SWAPC: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//SWAPC\n"); +#endif + // TODO needs temps!! + AddMOVCBinaryOp(&psInst->asOperands[0], &psInst->asOperands[2], &psInst->asOperands[4], &psInst->asOperands[3]); + AddMOVCBinaryOp(&psInst->asOperands[1], &psInst->asOperands[2], &psInst->asOperands[3], &psInst->asOperands[4]); + break; + } + + case OPCODE_LOG: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//LOG\n"); +#endif + CallHelper1("log2", psInst, 0, 1, 1); + break; + } + case OPCODE_RSQ: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//RSQ\n"); +#endif + CallHelper1("inversesqrt", psInst, 0, 1, 1); + break; + } + case OPCODE_EXP: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//EXP\n"); +#endif + CallHelper1("exp2", psInst, 0, 1, 1); + break; + } + case OPCODE_SQRT: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//SQRT\n"); +#endif + CallHelper1("sqrt", psInst, 0, 1, 1); + break; + } + case OPCODE_ROUND_PI: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ROUND_PI\n"); +#endif + CallHelper1("ceil", psInst, 0, 1, 1); + break; + } + case OPCODE_ROUND_NI: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ROUND_NI\n"); +#endif + CallHelper1("floor", psInst, 0, 1, 1); + break; + } + case OPCODE_ROUND_Z: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ROUND_Z\n"); +#endif + if (psContext->psShader->eTargetLanguage == LANG_ES_100) + UseExtraFunctionDependency("trunc"); + + CallHelper1("trunc", psInst, 0, 1, 1); + break; + } + case OPCODE_ROUND_NE: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ROUND_NE\n"); +#endif + + if (psContext->psShader->eTargetLanguage == LANG_ES_100) + UseExtraFunctionDependency("roundEven"); + + CallHelper1("roundEven", psInst, 0, 1, 1); + break; + } + case OPCODE_FRC: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//FRC\n"); +#endif + CallHelper1("fract", psInst, 0, 1, 1); + break; + } + case OPCODE_IMAX: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IMAX\n"); +#endif + if (psContext->psShader->eTargetLanguage == LANG_ES_100) + CallHelper2("max", psInst, 0, 1, 2, 1); + else + CallHelper2Int("max", psInst, 0, 1, 2, 1); + break; + } + case OPCODE_UMAX: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//UMAX\n"); +#endif + if (psContext->psShader->eTargetLanguage == LANG_ES_100) + CallHelper2("max", psInst, 0, 1, 2, 1); + else + CallHelper2UInt("max", psInst, 0, 1, 2, 1); + break; + } + case OPCODE_MAX: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//MAX\n"); +#endif + CallHelper2("max", psInst, 0, 1, 2, 1); + break; + } + case OPCODE_IMIN: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IMIN\n"); +#endif + if (psContext->psShader->eTargetLanguage == LANG_ES_100) + CallHelper2("min", psInst, 0, 1, 2, 1); + else + CallHelper2Int("min", psInst, 0, 1, 2, 1); + break; + } + case OPCODE_UMIN: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//UMIN\n"); +#endif + if (psContext->psShader->eTargetLanguage == LANG_ES_100) + CallHelper2("min", psInst, 0, 1, 2, 1); + else + CallHelper2UInt("min", psInst, 0, 1, 2, 1); + break; + } + case OPCODE_MIN: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//MIN\n"); +#endif + CallHelper2("min", psInst, 0, 1, 2, 1); + break; + } + case OPCODE_GATHER4: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//GATHER4\n"); +#endif + TranslateTextureSample(psInst, TEXSMP_FLAG_GATHER); + break; + } + case OPCODE_GATHER4_PO_C: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//GATHER4_PO_C\n"); +#endif + TranslateTextureSample(psInst, TEXSMP_FLAG_GATHER | TEXSMP_FLAG_PARAMOFFSET | TEXSMP_FLAG_DEPTHCOMPARE); + break; + } + case OPCODE_GATHER4_PO: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//GATHER4_PO\n"); +#endif + TranslateTextureSample(psInst, TEXSMP_FLAG_GATHER | TEXSMP_FLAG_PARAMOFFSET); + break; + } + case OPCODE_GATHER4_C: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//GATHER4_C\n"); +#endif + TranslateTextureSample(psInst, TEXSMP_FLAG_GATHER | TEXSMP_FLAG_DEPTHCOMPARE); + break; + } + case OPCODE_SAMPLE: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//SAMPLE\n"); #endif - TranslateTextureSample(psInst, TEXSMP_FLAG_NONE); - break; - } - case OPCODE_SAMPLE_L: - { + TranslateTextureSample(psInst, TEXSMP_FLAG_NONE); + break; + } + case OPCODE_SAMPLE_L: + { #ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//SAMPLE_L\n"); + psContext->AddIndentation(); + bcatcstr(glsl, "//SAMPLE_L\n"); #endif - TranslateTextureSample(psInst, TEXSMP_FLAG_LOD); - break; - } - case OPCODE_SAMPLE_C: - { + TranslateTextureSample(psInst, TEXSMP_FLAG_LOD); + break; + } + case OPCODE_SAMPLE_C: + { #ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//SAMPLE_C\n"); + psContext->AddIndentation(); + bcatcstr(glsl, "//SAMPLE_C\n"); #endif - TranslateTextureSample(psInst, TEXSMP_FLAG_DEPTHCOMPARE); - break; - } - case OPCODE_SAMPLE_C_LZ: - { + TranslateTextureSample(psInst, TEXSMP_FLAG_DEPTHCOMPARE); + break; + } + case OPCODE_SAMPLE_C_LZ: + { #ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//SAMPLE_C_LZ\n"); + psContext->AddIndentation(); + bcatcstr(glsl, "//SAMPLE_C_LZ\n"); #endif - - TranslateTextureSample(psInst, TEXSMP_FLAG_DEPTHCOMPARE | TEXSMP_FLAG_FIRSTLOD); - break; - } - case OPCODE_SAMPLE_D: - { + + TranslateTextureSample(psInst, TEXSMP_FLAG_DEPTHCOMPARE | TEXSMP_FLAG_FIRSTLOD); + break; + } + case OPCODE_SAMPLE_D: + { #ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//SAMPLE_D\n"); + psContext->AddIndentation(); + bcatcstr(glsl, "//SAMPLE_D\n"); #endif - TranslateTextureSample(psInst, TEXSMP_FLAG_GRAD); - break; - } - case OPCODE_SAMPLE_B: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//SAMPLE_B\n"); + TranslateTextureSample(psInst, TEXSMP_FLAG_GRAD); + break; + } + case OPCODE_SAMPLE_B: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//SAMPLE_B\n"); #endif - TranslateTextureSample(psInst, TEXSMP_FLAG_BIAS); - break; - } - case OPCODE_RET: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//RET\n"); -#endif - if (psContext->psShader->asPhases[psContext->currentPhase].hasPostShaderCode) - { + TranslateTextureSample(psInst, TEXSMP_FLAG_BIAS); + break; + } + case OPCODE_RET: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//RET\n"); +#endif + if (psContext->psShader->asPhases[psContext->currentPhase].hasPostShaderCode) + { #ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//--- Post shader code ---\n"); -#endif - bconcat(glsl, psContext->psShader->asPhases[psContext->currentPhase].postShaderCode); -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//--- End post shader code ---\n"); -#endif - } - psContext->AddIndentation(); - bcatcstr(glsl, "return;\n"); - break; - } - case OPCODE_INTERFACE_CALL: - { - const char* name; - ShaderVar* psVar; - uint32_t varFound; - - uint32_t funcPointer; - uint32_t funcBodyIndex; - uint32_t ui32NumBodiesPerTable; - -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//INTERFACE_CALL\n"); -#endif - - ASSERT(psInst->asOperands[0].eIndexRep[0] == OPERAND_INDEX_IMMEDIATE32); - - funcPointer = psInst->asOperands[0].aui32ArraySizes[0]; - funcBodyIndex = psInst->ui32FuncIndexWithinInterface; - - ui32NumBodiesPerTable = psContext->psShader->funcPointer[funcPointer].ui32NumBodiesPerTable; - - varFound = psContext->psShader->sInfo.GetInterfaceVarFromOffset(funcPointer, &psVar); - - ASSERT(varFound); - - name = &psVar->name[0]; - - psContext->AddIndentation(); - bcatcstr(glsl, name); - TranslateOperandIndexMAD(&psInst->asOperands[0], 1, ui32NumBodiesPerTable, funcBodyIndex); - //bformata(glsl, "[%d]", funcBodyIndex); - bcatcstr(glsl, "();\n"); - break; - } - case OPCODE_LABEL: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//LABEL\n"); -#endif - --psContext->indent; - psContext->AddIndentation(); - bcatcstr(glsl, "}\n"); //Closing brace ends the previous function. - psContext->AddIndentation(); - - bcatcstr(glsl, "subroutine(SubroutineType)\n"); - bcatcstr(glsl, "void "); - TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); - bcatcstr(glsl, "(){\n"); - ++psContext->indent; - break; - } - case OPCODE_COUNTBITS: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//COUNTBITS\n"); -#endif - psContext->AddIndentation(); - TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER | TO_FLAG_DESTINATION); - bcatcstr(glsl, " = bitCount("); - TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER); - bcatcstr(glsl, ");\n"); - break; - } - case OPCODE_FIRSTBIT_HI: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//FIRSTBIT_HI\n"); -#endif - psContext->AddIndentation(); - TranslateOperand(&psInst->asOperands[0], TO_FLAG_UNSIGNED_INTEGER | TO_FLAG_DESTINATION); - bcatcstr(glsl, " = findMSB("); - TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER); - bcatcstr(glsl, ");\n"); - break; - } - case OPCODE_FIRSTBIT_LO: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//FIRSTBIT_LO\n"); -#endif - psContext->AddIndentation(); - TranslateOperand(&psInst->asOperands[0], TO_FLAG_UNSIGNED_INTEGER | TO_FLAG_DESTINATION); - bcatcstr(glsl, " = findLSB("); - TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER); - bcatcstr(glsl, ");\n"); - break; - } - case OPCODE_FIRSTBIT_SHI: //signed high - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//FIRSTBIT_SHI\n"); -#endif - psContext->AddIndentation(); - TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER | TO_FLAG_DESTINATION); - bcatcstr(glsl, " = findMSB("); - TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER); - bcatcstr(glsl, ");\n"); - break; - } - case OPCODE_BFREV: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//BFREV\n"); -#endif - psContext->AddIndentation(); - TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER | TO_FLAG_DESTINATION); - bcatcstr(glsl, " = bitfieldReverse("); - TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER); - bcatcstr(glsl, ");\n"); - break; - } - case OPCODE_BFI: - { - uint32_t destMask = psInst->asOperands[0].GetAccessMask(); - uint32_t numelements_width = psInst->asOperands[1].GetNumSwizzleElements(); - uint32_t numelements_offset = psInst->asOperands[2].GetNumSwizzleElements(); - uint32_t numelements_dest = psInst->asOperands[0].GetNumSwizzleElements(); - uint32_t numoverall_elements = std::min(std::min(numelements_width, numelements_offset), numelements_dest); - uint32_t i, j, k; -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//BFI\n"); -#endif - - psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], SVT_INT, numoverall_elements, &numParenthesis); - - if (numoverall_elements == 1) - bformata(glsl, "int("); - else - bformata(glsl, "ivec%d(", numoverall_elements); - - k = 0; - for (i = 0; i < 4; ++i) - { - if ((destMask & (1 << i)) == 0) - continue; - - k++; - bcatcstr(glsl, "bitfieldInsert("); - - for (j = 4; j >= 1; --j) - { - TranslateOperand(&psInst->asOperands[j], TO_FLAG_INTEGER, 1 << i); - if (j != 1) - bcatcstr(glsl, ","); - } - - bcatcstr(glsl, ") "); - if (k != numoverall_elements) - bcatcstr(glsl, ", "); - } - bcatcstr(glsl, ")"); - AddAssignPrologue(numParenthesis); - break; - } - case OPCODE_CUT: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//CUT\n"); -#endif - psContext->AddIndentation(); - bcatcstr(glsl, "EndPrimitive();\n"); - break; - } - case OPCODE_EMIT: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//EMIT\n"); -#endif - if (psContext->psShader->asPhases[psContext->currentPhase].hasPostShaderCode) - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//--- Post shader code ---\n"); -#endif - bconcat(glsl, psContext->psShader->asPhases[psContext->currentPhase].postShaderCode); -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//--- End post shader code ---\n"); -#endif - } - - psContext->AddIndentation(); - bcatcstr(glsl, "EmitVertex();\n"); - break; - } - case OPCODE_EMITTHENCUT: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//EMITTHENCUT\n"); -#endif - psContext->AddIndentation(); - bcatcstr(glsl, "EmitVertex();\n"); - psContext->AddIndentation(); - bcatcstr(glsl, "EndPrimitive();\n"); - break; - } - - case OPCODE_CUT_STREAM: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//CUT_STREAM\n"); -#endif - psContext->AddIndentation(); - ASSERT(psInst->asOperands[0].eType == OPERAND_TYPE_STREAM); - if (psContext->psShader->eTargetLanguage < LANG_400 || psInst->asOperands[0].ui32RegisterNumber == 0) - { - // ES geom shaders only support one stream. - bcatcstr(glsl, "EndPrimitive();\n"); - } - else - { - bcatcstr(glsl, "EndStreamPrimitive("); - TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); - bcatcstr(glsl, ");\n"); - } - - break; - } - case OPCODE_EMIT_STREAM: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//EMIT_STREAM\n"); -#endif - if (psContext->psShader->asPhases[psContext->currentPhase].hasPostShaderCode) - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//--- Post shader code ---\n"); -#endif - bconcat(glsl, psContext->psShader->asPhases[psContext->currentPhase].postShaderCode); -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//--- End post shader code ---\n"); -#endif - } - - psContext->AddIndentation(); - - ASSERT(psInst->asOperands[0].eType == OPERAND_TYPE_STREAM); - if (psContext->psShader->eTargetLanguage < LANG_400 || psInst->asOperands[0].ui32RegisterNumber == 0) - { - // ES geom shaders only support one stream. - bcatcstr(glsl, "EmitVertex();\n"); - } - else - { - bcatcstr(glsl, "EmitStreamVertex("); - TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); - bcatcstr(glsl, ");\n"); - } - break; - } - case OPCODE_EMITTHENCUT_STREAM: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//EMITTHENCUT\n"); -#endif - ASSERT(psInst->asOperands[0].eType == OPERAND_TYPE_STREAM); - if (psContext->psShader->eTargetLanguage < LANG_400 || psInst->asOperands[0].ui32RegisterNumber == 0) - { - // ES geom shaders only support one stream. - bcatcstr(glsl, "EmitVertex();\n"); - psContext->AddIndentation(); - bcatcstr(glsl, "EndPrimitive();\n"); - } - else - { - bcatcstr(glsl, "EmitStreamVertex("); - TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); - bcatcstr(glsl, ");\n"); - psContext->AddIndentation(); - bcatcstr(glsl, "EndStreamPrimitive("); - TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); - bcatcstr(glsl, ");\n"); - } - break; - } - case OPCODE_REP: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//REP\n"); -#endif - //Need to handle nesting. - //Max of 4 for rep - 'Flow Control Limitations' http://msdn.microsoft.com/en-us/library/windows/desktop/bb219848(v=vs.85).aspx - - psContext->AddIndentation(); - bcatcstr(glsl, "RepCounter = "); - TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER, OPERAND_4_COMPONENT_MASK_X); - bcatcstr(glsl, ";\n"); - - psContext->AddIndentation(); - bcatcstr(glsl, "while(RepCounter!=0){\n"); - ++psContext->indent; - break; - } - case OPCODE_ENDREP: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ENDREP\n"); -#endif - psContext->AddIndentation(); - bcatcstr(glsl, "RepCounter--;\n"); - - --psContext->indent; - - psContext->AddIndentation(); - bcatcstr(glsl, "}\n"); - break; - } - case OPCODE_LOOP: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//LOOP\n"); -#endif - psContext->AddIndentation(); - - if (psInst->ui32NumOperands == 2) - { - //DX9 version - ASSERT(psInst->asOperands[0].eType == OPERAND_TYPE_SPECIAL_LOOPCOUNTER); - bcatcstr(glsl, "for("); - bcatcstr(glsl, "LoopCounter = "); - TranslateOperand(&psInst->asOperands[1], TO_FLAG_NONE); - bcatcstr(glsl, ".y, ZeroBasedCounter = 0;"); - bcatcstr(glsl, "ZeroBasedCounter < "); - TranslateOperand(&psInst->asOperands[1], TO_FLAG_NONE); - bcatcstr(glsl, ".x;"); - - bcatcstr(glsl, "LoopCounter += "); - TranslateOperand(&psInst->asOperands[1], TO_FLAG_NONE); - bcatcstr(glsl, ".z, ZeroBasedCounter++){\n"); - ++psContext->indent; - } - else if (psInst->m_LoopInductors[1] != 0) - { - // Can emit as for - uint32_t typeFlags = TO_FLAG_INTEGER; - bcatcstr(glsl, "for("); - if (psInst->m_LoopInductors[0] != 0) - { - if (psInst->m_InductorRegister != 0) - { - // Do declaration here as well - switch (psInst->m_LoopInductors[0]->asOperands[0].GetDataType(psContext)) - { - case SVT_INT: - bcatcstr(glsl, "int "); - break; - case SVT_UINT: - bcatcstr(glsl, "uint "); - typeFlags = TO_FLAG_UNSIGNED_INTEGER; - break; - default: - ASSERT(0); - break; - } - } - TranslateInstruction(psInst->m_LoopInductors[0], true); - } - bcatcstr(glsl, " ; "); - bool negateCondition = psInst->m_LoopInductors[1]->eBooleanTestType != INSTRUCTION_TEST_NONZERO; - bool negateOrder = false; - - // Yet Another NVidia OSX shader compiler bug workaround (really nvidia, get your s#!t together): - // For reasons unfathomable to us, this breaks SSAO effect on OSX (case 756028) - // Broken: for(int ti_loop_1 = int(int(0xFFFFFFFCu)) ; 4 >= ti_loop_1 ; ti_loop_1++) - // Works: for (int ti_loop_1 = int(int(0xFFFFFFFCu)); ti_loop_1 <= 4; ti_loop_1++) - // - // So, check if the first argument is an immediate value, and if so, switch the order or the operands - // (and adjust condition) - if (psInst->m_LoopInductors[1]->asOperands[1].eType == OPERAND_TYPE_IMMEDIATE32) - negateOrder = true; - - const char *cmpOp = ""; - switch (psInst->m_LoopInductors[1]->eOpcode) - { - case OPCODE_IGE: - if(negateOrder) - cmpOp = negateCondition ? ">" : "<="; - else - cmpOp = negateCondition ? "<" : ">="; - break; - case OPCODE_ILT: - if(negateOrder) - cmpOp = negateCondition ? "<=" : ">"; - else - cmpOp = negateCondition ? ">=" : "<"; - break; - case OPCODE_IEQ: - // No need to change the comparison if negateOrder is true - cmpOp = negateCondition ? "!=" : "=="; - if (psInst->m_LoopInductors[1]->asOperands[0].GetDataType(psContext) == SVT_UINT) - typeFlags = TO_FLAG_UNSIGNED_INTEGER; - break; - case OPCODE_INE: - // No need to change the comparison if negateOrder is true - cmpOp = negateCondition ? "==" : "!="; - if (psInst->m_LoopInductors[1]->asOperands[0].GetDataType(psContext) == SVT_UINT) - typeFlags = TO_FLAG_UNSIGNED_INTEGER; - break; - case OPCODE_UGE: - if(negateOrder) - cmpOp = negateCondition ? ">" : "<="; - else - cmpOp = negateCondition ? "<" : ">="; - typeFlags = TO_FLAG_UNSIGNED_INTEGER; - break; - case OPCODE_ULT: - if(negateOrder) - cmpOp = negateCondition ? "<=" : ">"; - else - cmpOp = negateCondition ? ">=" : "<"; - typeFlags = TO_FLAG_UNSIGNED_INTEGER; - break; - - default: - ASSERT(0); - } - TranslateOperand(&psInst->m_LoopInductors[1]->asOperands[negateOrder ? 2 : 1], typeFlags); - bcatcstr(glsl, cmpOp); - TranslateOperand(&psInst->m_LoopInductors[1]->asOperands[negateOrder ? 1 : 2], typeFlags); - - bcatcstr(glsl, " ; "); - // One more shortcut: translate IADD tX, tX, 1 to tX++ - if (HLSLcc::IsAddOneInstruction(psInst->m_LoopInductors[3])) - { - TranslateOperand(&psInst->m_LoopInductors[3]->asOperands[0], TO_FLAG_DESTINATION); - bcatcstr(glsl, "++"); - } - else - TranslateInstruction(psInst->m_LoopInductors[3], true); - - bcatcstr(glsl, ")\n"); - psContext->AddIndentation(); - bcatcstr(glsl, "{\n"); - ++psContext->indent; - } - else - { - if (psContext->psShader->eTargetLanguage == LANG_ES_100) - { - bstring name; - name = bformat(HLSLCC_TEMP_PREFIX "i_while_true_%d", m_NumDeclaredWhileTrueLoops++); - - // Workaround limitation with WebGL 1.0 GLSL, as we're expecting something to break the loop in any case - int hardcoded_iteration_limit = 0x7FFFFFFF; - - bformata(glsl, "for(int %s = 0 ; %s < 0x%X ; %s++){\n", name->data, name->data, hardcoded_iteration_limit, name->data); - } - else - { - bcatcstr(glsl, "while(true){\n"); - } - ++psContext->indent; - } - break; - } - case OPCODE_ENDLOOP: - { - --psContext->indent; -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ENDLOOP\n"); -#endif - psContext->AddIndentation(); - bcatcstr(glsl, "}\n"); - break; - } - case OPCODE_BREAK: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//BREAK\n"); -#endif - psContext->AddIndentation(); - bcatcstr(glsl, "break;\n"); - break; - } - case OPCODE_BREAKC: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//BREAKC\n"); -#endif - psContext->AddIndentation(); - - TranslateConditional(psInst, glsl); - break; - } - case OPCODE_CONTINUEC: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//CONTINUEC\n"); -#endif - psContext->AddIndentation(); - - TranslateConditional(psInst, glsl); - break; - } - case OPCODE_IF: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//IF\n"); -#endif - psContext->AddIndentation(); - - TranslateConditional(psInst, glsl); - ++psContext->indent; - break; - } - case OPCODE_RETC: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//RETC\n"); -#endif - psContext->AddIndentation(); - - TranslateConditional(psInst, glsl); - break; - } - case OPCODE_ELSE: - { - --psContext->indent; -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ELSE\n"); -#endif - psContext->AddIndentation(); - bcatcstr(glsl, "} else {\n"); - psContext->indent++; - break; - } - case OPCODE_ENDSWITCH: - case OPCODE_ENDIF: - { - --psContext->indent; - psContext->AddIndentation(); - bcatcstr(glsl, "//ENDIF\n"); - psContext->AddIndentation(); - bcatcstr(glsl, "}\n"); - break; - } - case OPCODE_CONTINUE: - { - psContext->AddIndentation(); - bcatcstr(glsl, "continue;\n"); - break; - } - case OPCODE_DEFAULT: - { - --psContext->indent; - psContext->AddIndentation(); - bcatcstr(glsl, "default:\n"); - ++psContext->indent; - break; - } - case OPCODE_NOP: - { - break; - } - case OPCODE_SYNC: - { - const uint32_t ui32SyncFlags = psInst->ui32SyncFlags; - -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//SYNC\n"); -#endif - - if (ui32SyncFlags & SYNC_THREAD_GROUP_SHARED_MEMORY) - { - psContext->AddIndentation(); - bcatcstr(glsl, "memoryBarrierShared();\n"); - } - if (ui32SyncFlags & (SYNC_UNORDERED_ACCESS_VIEW_MEMORY_GROUP | SYNC_UNORDERED_ACCESS_VIEW_MEMORY_GLOBAL)) - { - psContext->AddIndentation(); - bcatcstr(glsl, "memoryBarrier();\n"); - } - if (ui32SyncFlags & SYNC_THREADS_IN_GROUP) - { - psContext->AddIndentation(); - bcatcstr(glsl, "barrier();\n"); - } - break; - } - case OPCODE_SWITCH: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//SWITCH\n"); -#endif - psContext->AddIndentation(); - bcatcstr(glsl, "switch(int("); - TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER); - bcatcstr(glsl, ")){\n"); - - psContext->indent += 2; - break; - } - case OPCODE_CASE: - { - --psContext->indent; -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//case\n"); -#endif - psContext->AddIndentation(); - - bcatcstr(glsl, "case "); - TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER); - bcatcstr(glsl, ":\n"); - - ++psContext->indent; - break; - } - case OPCODE_EQ: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//EQ\n"); -#endif - AddComparison(psInst, CMP_EQ, TO_FLAG_NONE); - break; - } - case OPCODE_USHR: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//USHR\n"); -#endif - CallBinaryOp(">>", psInst, 0, 1, 2, SVT_UINT); - break; - } - case OPCODE_ISHL: - { - SHADER_VARIABLE_TYPE eType = SVT_INT; - -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ISHL\n"); -#endif - - if (psInst->asOperands[0].GetDataType(psContext) == SVT_UINT) - { - eType = SVT_UINT; - } - - CallBinaryOp("<<", psInst, 0, 1, 2, eType); - break; - } - case OPCODE_ISHR: - { - SHADER_VARIABLE_TYPE eType = SVT_INT; -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ISHR\n"); -#endif - - if (psInst->asOperands[0].GetDataType(psContext) == SVT_UINT) - { - eType = SVT_UINT; - } - - CallBinaryOp(">>", psInst, 0, 1, 2, eType); - break; - } - case OPCODE_LD: - case OPCODE_LD_MS: - { - const ResourceBinding* psBinding = 0; -#ifdef _DEBUG - psContext->AddIndentation(); - if (psInst->eOpcode == OPCODE_LD) - bcatcstr(glsl, "//LD\n"); - else - bcatcstr(glsl, "//LD_MS\n"); -#endif - - psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_TEXTURE, psInst->asOperands[2].ui32RegisterNumber, &psBinding); - - TranslateTexelFetch(psInst, psBinding, glsl); - break; - } - case OPCODE_DISCARD: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//DISCARD\n"); -#endif - psContext->AddIndentation(); - if (psContext->psShader->ui32MajorVersion <= 3) - { - bcatcstr(glsl, "if(any(lessThan(("); - TranslateOperand(&psInst->asOperands[0], TO_FLAG_NONE); - - if (psContext->psShader->ui32MajorVersion == 1) - { - /* SM1.X only kills based on the rgb channels */ - bcatcstr(glsl, ").xyz, vec3(0)))){discard;}\n"); - } - else - { - bcatcstr(glsl, "), vec4(0)))){discard;}\n"); - } - } - else if (psInst->eBooleanTestType == INSTRUCTION_TEST_ZERO) - { - bcatcstr(glsl, "if(("); - TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER); - bcatcstr(glsl, ")==0){discard;}\n"); - } - else - { - ASSERT(psInst->eBooleanTestType == INSTRUCTION_TEST_NONZERO); - bcatcstr(glsl, "if(("); - TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER); - bcatcstr(glsl, ")!=0){discard;}\n"); - } - break; - } - case OPCODE_LOD: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//LOD\n"); -#endif - //LOD computes the following vector (ClampedLOD, NonClampedLOD, 0, 0) - - psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], SVT_FLOAT, 4, &numParenthesis); - - //If the core language does not have query-lod feature, - //then the extension is used. The name of the function - //changed between extension and core. - if (HaveQueryLod(psContext->psShader->eTargetLanguage)) - { - bcatcstr(glsl, "textureQueryLod("); - } - else - { - bcatcstr(glsl, "textureQueryLOD("); - } - - TranslateOperand(&psInst->asOperands[2], TO_FLAG_NONE); - bcatcstr(glsl, ","); - TranslateTexCoord( - psContext->psShader->aeResourceDims[psInst->asOperands[2].ui32RegisterNumber], - &psInst->asOperands[1]); - bcatcstr(glsl, ")"); - - //The swizzle on srcResource allows the returned values to be swizzled arbitrarily before they are written to the destination. - - // iWriteMaskEnabled is forced off during DecodeOperand because swizzle on sampler uniforms - // does not make sense. But need to re-enable to correctly swizzle this particular instruction. - psInst->asOperands[2].iWriteMaskEnabled = 1; - TranslateOperandSwizzleWithMask(psContext, &psInst->asOperands[2], psInst->asOperands[0].GetAccessMask(), 0); - AddAssignPrologue(numParenthesis); - break; - } - case OPCODE_EVAL_CENTROID: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//EVAL_CENTROID\n"); -#endif - psContext->AddIndentation(); - TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); - bcatcstr(glsl, " = interpolateAtCentroid("); - //interpolateAtCentroid accepts in-qualified variables. - //As long as bytecode only writes vX registers in declarations - //we should be able to use the declared name directly. - TranslateOperand(&psInst->asOperands[1], TO_FLAG_DECLARATION_NAME); - bcatcstr(glsl, ");\n"); - break; - } - case OPCODE_EVAL_SAMPLE_INDEX: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//EVAL_SAMPLE_INDEX\n"); -#endif - psContext->AddIndentation(); - TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); - bcatcstr(glsl, " = interpolateAtSample("); - //interpolateAtSample accepts in-qualified variables. - //As long as bytecode only writes vX registers in declarations - //we should be able to use the declared name directly. - TranslateOperand(&psInst->asOperands[1], TO_FLAG_DECLARATION_NAME); - bcatcstr(glsl, ", "); - TranslateOperand(&psInst->asOperands[2], TO_FLAG_INTEGER); - bcatcstr(glsl, ");\n"); - break; - } - case OPCODE_EVAL_SNAPPED: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//EVAL_SNAPPED\n"); -#endif - psContext->AddIndentation(); - TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); - bcatcstr(glsl, " = interpolateAtOffset("); - //interpolateAtOffset accepts in-qualified variables. - //As long as bytecode only writes vX registers in declarations - //we should be able to use the declared name directly. - TranslateOperand(&psInst->asOperands[1], TO_FLAG_DECLARATION_NAME); - bcatcstr(glsl, ", "); - TranslateOperand(&psInst->asOperands[2], TO_FLAG_INTEGER); - bcatcstr(glsl, ".xy);\n"); - break; - } - case OPCODE_LD_STRUCTURED: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//LD_STRUCTURED\n"); -#endif - TranslateShaderStorageLoad(psInst); - break; - } - case OPCODE_LD_UAV_TYPED: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//LD_UAV_TYPED\n"); -#endif - Operand* psDest = &psInst->asOperands[0]; - Operand* psSrc = &psInst->asOperands[2]; - Operand* psSrcAddr = &psInst->asOperands[1]; - - int srcCount = psSrc->GetNumSwizzleElements(); - int numParenthesis = 0; - uint32_t compMask = 0; - - switch (psInst->eResDim) - { - case RESOURCE_DIMENSION_TEXTURE3D: - case RESOURCE_DIMENSION_TEXTURE2DARRAY: - case RESOURCE_DIMENSION_TEXTURE2DMSARRAY: - case RESOURCE_DIMENSION_TEXTURECUBEARRAY: - compMask |= (1 << 2); - case RESOURCE_DIMENSION_TEXTURECUBE: - case RESOURCE_DIMENSION_TEXTURE1DARRAY: - case RESOURCE_DIMENSION_TEXTURE2D: - case RESOURCE_DIMENSION_TEXTURE2DMS: - compMask |= (1 << 1); - case RESOURCE_DIMENSION_TEXTURE1D: - case RESOURCE_DIMENSION_BUFFER: - compMask |= 1; - break; - default: - ASSERT(0); - break; - } - - SHADER_VARIABLE_TYPE srcDataType = SVT_FLOAT; - const ResourceBinding* psBinding = 0; - psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_UAV, psSrc->ui32RegisterNumber, &psBinding); - switch (psBinding->ui32ReturnType) - { - case RETURN_TYPE_FLOAT: - srcDataType = SVT_FLOAT; - break; - case RETURN_TYPE_SINT: - srcDataType = SVT_INT; - break; - case RETURN_TYPE_UINT: - srcDataType = SVT_UINT; - break; - default: - ASSERT(0); - // Suppress uninitialised variable warning - srcDataType = SVT_VOID; - break; - } - - psContext->AddIndentation(); - AddAssignToDest(psDest, srcDataType, srcCount, &numParenthesis); - bcatcstr(glsl, "imageLoad("); - TranslateOperand(psSrc, TO_FLAG_NAME_ONLY); - bcatcstr(glsl, ", "); - TranslateOperand(psSrcAddr, TO_FLAG_INTEGER, compMask); - bcatcstr(glsl, ")"); - TranslateOperandSwizzle(psContext, &psInst->asOperands[0], 0); - AddAssignPrologue(numParenthesis); - break; - } - case OPCODE_STORE_RAW: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//STORE_RAW\n"); -#endif - TranslateShaderStorageStore(psInst); - break; - } - case OPCODE_STORE_STRUCTURED: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//STORE_STRUCTURED\n"); -#endif - TranslateShaderStorageStore(psInst); - break; - } - - case OPCODE_STORE_UAV_TYPED: - { - const ResourceBinding* psRes; - int foundResource; - uint32_t flags = TO_FLAG_INTEGER; - uint32_t opMask = OPERAND_4_COMPONENT_MASK_ALL; -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//STORE_UAV_TYPED\n"); -#endif - psContext->AddIndentation(); - - foundResource = psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_UAV, - psInst->asOperands[0].ui32RegisterNumber, - &psRes); - - ASSERT(foundResource); - - bcatcstr(glsl, "imageStore("); - TranslateOperand(&psInst->asOperands[0], TO_FLAG_NAME_ONLY); - bcatcstr(glsl, ", "); - - switch (psRes->eDimension) - { - case REFLECT_RESOURCE_DIMENSION_TEXTURE1D: - case REFLECT_RESOURCE_DIMENSION_BUFFER: - opMask = OPERAND_4_COMPONENT_MASK_X; - break; - case REFLECT_RESOURCE_DIMENSION_TEXTURE2D: - case REFLECT_RESOURCE_DIMENSION_TEXTURE1DARRAY: - case REFLECT_RESOURCE_DIMENSION_TEXTURE2DMS: - opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y; - flags |= TO_AUTO_EXPAND_TO_VEC2; - break; - case REFLECT_RESOURCE_DIMENSION_TEXTURE2DARRAY: - case REFLECT_RESOURCE_DIMENSION_TEXTURE3D: - case REFLECT_RESOURCE_DIMENSION_TEXTURE2DMSARRAY: - case REFLECT_RESOURCE_DIMENSION_TEXTURECUBE: - opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y | OPERAND_4_COMPONENT_MASK_Z; - flags |= TO_AUTO_EXPAND_TO_VEC3; - break; - case REFLECT_RESOURCE_DIMENSION_TEXTURECUBEARRAY: - flags |= TO_AUTO_EXPAND_TO_VEC4; - break; - default: - ASSERT(0); - break; - }; - - TranslateOperand(&psInst->asOperands[1], flags, opMask); - bcatcstr(glsl, ", "); - TranslateOperand(&psInst->asOperands[2], ResourceReturnTypeToFlag(psRes->ui32ReturnType)); - bformata(glsl, ");\n"); - - break; - } - case OPCODE_LD_RAW: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//LD_RAW\n"); -#endif - - TranslateShaderStorageLoad(psInst); - break; - } - - case OPCODE_ATOMIC_AND: - case OPCODE_ATOMIC_OR: - case OPCODE_ATOMIC_XOR: - case OPCODE_ATOMIC_CMP_STORE: - case OPCODE_ATOMIC_IADD: - case OPCODE_ATOMIC_IMAX: - case OPCODE_ATOMIC_IMIN: - case OPCODE_ATOMIC_UMAX: - case OPCODE_ATOMIC_UMIN: - case OPCODE_IMM_ATOMIC_IADD: - case OPCODE_IMM_ATOMIC_AND: - case OPCODE_IMM_ATOMIC_OR: - case OPCODE_IMM_ATOMIC_XOR: - case OPCODE_IMM_ATOMIC_EXCH: - case OPCODE_IMM_ATOMIC_CMP_EXCH: - case OPCODE_IMM_ATOMIC_IMAX: - case OPCODE_IMM_ATOMIC_IMIN: - case OPCODE_IMM_ATOMIC_UMAX: - case OPCODE_IMM_ATOMIC_UMIN: - { - TranslateAtomicMemOp(psInst); - break; - } - case OPCODE_UBFE: - case OPCODE_IBFE: - { - int numParenthesis = 0; - int i; - uint32_t writeMask = psInst->asOperands[0].GetAccessMask(); - SHADER_VARIABLE_TYPE dataType = psInst->eOpcode == OPCODE_UBFE ? SVT_UINT : SVT_INT; - uint32_t flags = psInst->eOpcode == OPCODE_UBFE ? TO_AUTO_BITCAST_TO_UINT : TO_AUTO_BITCAST_TO_INT; -#ifdef _DEBUG - psContext->AddIndentation(); - if (psInst->eOpcode == OPCODE_UBFE) - bcatcstr(glsl, "//OPCODE_UBFE\n"); - else - bcatcstr(glsl, "//OPCODE_IBFE\n"); -#endif - // Need to open this up, GLSL bitfieldextract uses same offset and width for all components - for (i = 0; i < 4; i++) - { - if ((writeMask & (1 << i)) == 0) - continue; - psContext->AddIndentation(); - psInst->asOperands[0].ui32CompMask = (1 << i); - psInst->asOperands[0].eSelMode = OPERAND_4_COMPONENT_MASK_MODE; - AddAssignToDest(&psInst->asOperands[0], dataType, 1, &numParenthesis); - - bcatcstr(glsl, "bitfieldExtract("); - TranslateOperand(&psInst->asOperands[3], flags, (1 << i)); - bcatcstr(glsl, ", "); - TranslateOperand(&psInst->asOperands[2], TO_AUTO_BITCAST_TO_INT, (1 << i)); - bcatcstr(glsl, ", "); - TranslateOperand(&psInst->asOperands[1], TO_AUTO_BITCAST_TO_INT, (1 << i)); - bcatcstr(glsl, ")"); - AddAssignPrologue(numParenthesis); - - } - break; - } - case OPCODE_RCP: - { - const uint32_t destElemCount = psInst->asOperands[0].GetNumSwizzleElements(); - const uint32_t srcElemCount = psInst->asOperands[1].GetNumSwizzleElements(); - int numParenthesis = 0; -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//RCP\n"); -#endif - psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], SVT_FLOAT, srcElemCount, &numParenthesis); - bcatcstr(glsl, GetConstructorForTypeGLSL(psContext, SVT_FLOAT, destElemCount, false)); - bcatcstr(glsl, "(1.0) / "); - bcatcstr(glsl, GetConstructorForTypeGLSL(psContext, SVT_FLOAT, destElemCount, false)); - bcatcstr(glsl, "("); - numParenthesis++; - TranslateOperand(&psInst->asOperands[1], TO_FLAG_NONE, psInst->asOperands[0].GetAccessMask()); - AddAssignPrologue(numParenthesis); - break; - } - case OPCODE_F32TOF16: - { - uint32_t writeMask = psInst->asOperands[0].GetAccessMask(); - -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//F32TOF16\n"); -#endif - - for (int i = 0; i < 4; i++) - { - if ((writeMask & (1 << i)) == 0) - continue; - psContext->AddIndentation(); - psInst->asOperands[0].ui32CompMask = (1 << i); - psInst->asOperands[0].eSelMode = OPERAND_4_COMPONENT_MASK_MODE; - AddAssignToDest(&psInst->asOperands[0], SVT_UINT, 1, &numParenthesis); - - bcatcstr(glsl, "packHalf2x16(vec2("); - TranslateOperand(&psInst->asOperands[1], TO_FLAG_NONE, (1 << i)); - bcatcstr(glsl, ", 0.0))"); - AddAssignPrologue(numParenthesis); - - } - break; - } - case OPCODE_F16TOF32: - { - uint32_t writeMask = psInst->asOperands[0].GetAccessMask(); - -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//F16TOF32\n"); -#endif - - for (int i = 0; i < 4; i++) - { - if ((writeMask & (1 << i)) == 0) - continue; - psContext->AddIndentation(); - psInst->asOperands[0].ui32CompMask = (1 << i); - psInst->asOperands[0].eSelMode = OPERAND_4_COMPONENT_MASK_MODE; - AddAssignToDest(&psInst->asOperands[0], SVT_FLOAT, 1, &numParenthesis); - - bcatcstr(glsl, "unpackHalf2x16("); - TranslateOperand(&psInst->asOperands[1], TO_AUTO_BITCAST_TO_UINT, (1 << i)); - bcatcstr(glsl, ").x"); - AddAssignPrologue(numParenthesis); - } - break; - - - } - case OPCODE_INEG: - { - int numParenthesis = 0; -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//INEG\n"); -#endif - //dest = 0 - src0 - psContext->AddIndentation(); - - AddAssignToDest(&psInst->asOperands[0], SVT_INT, psInst->asOperands[1].GetNumSwizzleElements(), &numParenthesis); - - bcatcstr(glsl, "0 - "); - TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER, psInst->asOperands[0].GetAccessMask()); - AddAssignPrologue(numParenthesis); - break; - } - case OPCODE_DERIV_RTX_COARSE: - case OPCODE_DERIV_RTX_FINE: - case OPCODE_DERIV_RTX: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//DERIV_RTX\n"); -#endif - CallHelper1("dFdx", psInst, 0, 1, 1); - break; - } - case OPCODE_DERIV_RTY_COARSE: - case OPCODE_DERIV_RTY_FINE: - case OPCODE_DERIV_RTY: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//DERIV_RTY\n"); -#endif - CallHelper1("dFdy", psInst, 0, 1, 1); - break; - } - case OPCODE_LRP: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//LRP\n"); -#endif - CallHelper3("mix", psInst, 0, 2, 3, 1, 1); - break; - } - case OPCODE_DP2ADD: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//DP2ADD\n"); -#endif - psContext->AddIndentation(); - TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); - bcatcstr(glsl, " = dot(vec2("); - TranslateOperand(&psInst->asOperands[1], TO_FLAG_NONE); - bcatcstr(glsl, "), vec2("); - TranslateOperand(&psInst->asOperands[2], TO_FLAG_NONE); - bcatcstr(glsl, ")) + "); - TranslateOperand(&psInst->asOperands[3], TO_FLAG_NONE); - bcatcstr(glsl, ";\n"); - break; - } - case OPCODE_POW: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//POW\n"); -#endif - psContext->AddIndentation(); - TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); - bcatcstr(glsl, " = pow(abs("); - TranslateOperand(&psInst->asOperands[1], TO_FLAG_NONE); - bcatcstr(glsl, "), "); - TranslateOperand(&psInst->asOperands[2], TO_FLAG_NONE); - bcatcstr(glsl, ");\n"); - break; - } - - case OPCODE_IMM_ATOMIC_ALLOC: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//IMM_ATOMIC_ALLOC\n"); -#endif - psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], SVT_UINT, 1, &numParenthesis); - if (isVulkan || avoidAtomicCounter) - bcatcstr(glsl, "atomicAdd("); - else - bcatcstr(glsl, "atomicCounterIncrement("); - ResourceName(glsl, psContext, RGROUP_UAV, psInst->asOperands[1].ui32RegisterNumber, 0); - bformata(glsl, "_counter"); - if (isVulkan || avoidAtomicCounter) - bcatcstr(glsl, ", 1u)"); - else - bcatcstr(glsl, ")"); - AddAssignPrologue(numParenthesis); - break; - } - case OPCODE_IMM_ATOMIC_CONSUME: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//IMM_ATOMIC_CONSUME\n"); -#endif - psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], SVT_UINT, 1, &numParenthesis); - if (isVulkan || avoidAtomicCounter) - bcatcstr(glsl, "(atomicAdd("); - else - bcatcstr(glsl, "atomicCounterDecrement("); - ResourceName(glsl, psContext, RGROUP_UAV, psInst->asOperands[1].ui32RegisterNumber, 0); - bformata(glsl, "_counter"); - if (isVulkan || avoidAtomicCounter) - bcatcstr(glsl, ", 0xffffffffu) + 0xffffffffu)"); - else - bcatcstr(glsl, ")"); - AddAssignPrologue(numParenthesis); - break; - } - - case OPCODE_NOT: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//NOT\n"); -#endif - if (!HaveNativeBitwiseOps(psContext->psShader->eTargetLanguage)) - { - UseExtraFunctionDependency("op_not"); - - psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], SVT_INT, psInst->asOperands[1].GetNumSwizzleElements(), &numParenthesis); - bcatcstr(glsl, "op_not("); - numParenthesis++; - TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER, psInst->asOperands[0].GetAccessMask()); - AddAssignPrologue(numParenthesis); - } - else - { - psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], SVT_INT, psInst->asOperands[1].GetNumSwizzleElements(), &numParenthesis); - - bcatcstr(glsl, "~"); - TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER, psInst->asOperands[0].GetAccessMask()); - AddAssignPrologue(numParenthesis); - } - break; - } - case OPCODE_XOR: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//XOR\n"); -#endif - CallBinaryOp("^", psInst, 0, 1, 2, SVT_UINT); - break; - } - case OPCODE_RESINFO: - { - - uint32_t destElem; - uint32_t mask = psInst->asOperands[0].GetAccessMask(); - -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//RESINFO\n"); -#endif - - for (destElem = 0; destElem < 4; ++destElem) - { - if (1 << destElem & mask) - GetResInfoData(psInst, psInst->asOperands[2].aui32Swizzle[destElem], destElem); - } - - break; - } - case OPCODE_BUFINFO: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//BUFINFO\n"); -#endif - psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], SVT_INT, 1, &numParenthesis); - TranslateOperand(&psInst->asOperands[1], TO_FLAG_NAME_ONLY); - bcatcstr(glsl, "_buf.length()"); - AddAssignPrologue(numParenthesis); - break; - } - case OPCODE_SAMPLE_INFO: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//SAMPLE_INFO\n"); -#endif - const RESINFO_RETURN_TYPE eResInfoReturnType = psInst->eResInfoReturnType; - psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_FLOAT ? SVT_FLOAT : SVT_UINT, 1, &numParenthesis); - bcatcstr(glsl, "textureSamples("); - std::string texName = ResourceName(psContext, RGROUP_TEXTURE, psInst->asOperands[1].ui32RegisterNumber, 0); - if (psContext->IsVulkan()) - { - std::string vulkanSamplerName = GetVulkanDummySamplerName(); - - const RESOURCE_DIMENSION eResDim = psContext->psShader->aeResourceDims[psInst->asOperands[2].ui32RegisterNumber]; - std::string smpType = GetSamplerType(psContext, eResDim, psInst->asOperands[2].ui32RegisterNumber); - std::ostringstream oss; - oss << smpType; - oss << "(" << texName << ", " << vulkanSamplerName << ")"; - texName = oss.str(); - } - bcatcstr(glsl, texName.c_str()); - bcatcstr(glsl, ")"); - AddAssignPrologue(numParenthesis); - break; - } - case OPCODE_DMAX: - case OPCODE_DMIN: - case OPCODE_DMUL: - case OPCODE_DEQ: - case OPCODE_DGE: - case OPCODE_DLT: - case OPCODE_DNE: - case OPCODE_DMOV: - case OPCODE_DMOVC: - case OPCODE_DTOF: - case OPCODE_FTOD: - case OPCODE_DDIV: - case OPCODE_DFMA: - case OPCODE_DRCP: - case OPCODE_MSAD: - case OPCODE_DTOI: - case OPCODE_DTOU: - case OPCODE_ITOD: - case OPCODE_UTOD: - default: - { - ASSERT(0); - break; - } - } - - if (psInst->bSaturate) //Saturate is only for floating point data (float opcodes or MOV) - { - int dstCount = psInst->asOperands[0].GetNumSwizzleElements(); - - const bool workaroundAdrenoBugs = psContext->psShader->eTargetLanguage == LANG_ES_300; - - if (workaroundAdrenoBugs) - bcatcstr(glsl, "#ifdef UNITY_ADRENO_ES3\n"); - - for (int i = workaroundAdrenoBugs ? 0 : 1; i < 2; ++i) - { - const bool generateWorkaround = (i == 0); - psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], SVT_FLOAT, dstCount, &numParenthesis); - bcatcstr(glsl, generateWorkaround ? "min(max(" : "clamp("); - TranslateOperand(&psInst->asOperands[0], TO_AUTO_BITCAST_TO_FLOAT); - bcatcstr(glsl, generateWorkaround ? ", 0.0), 1.0)" : ", 0.0, 1.0)"); - AddAssignPrologue(numParenthesis); - - if (generateWorkaround) - bcatcstr(glsl, "#else\n"); - } - - if (workaroundAdrenoBugs) - bcatcstr(glsl, "#endif\n"); - } + psContext->AddIndentation(); + bcatcstr(glsl, "//--- Post shader code ---\n"); +#endif + bconcat(glsl, psContext->psShader->asPhases[psContext->currentPhase].postShaderCode); +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//--- End post shader code ---\n"); +#endif + } + psContext->AddIndentation(); + bcatcstr(glsl, "return;\n"); + break; + } + case OPCODE_INTERFACE_CALL: + { + const char* name; + ShaderVar* psVar; + uint32_t varFound; + + uint32_t funcPointer; + uint32_t funcBodyIndex; + uint32_t ui32NumBodiesPerTable; + +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//INTERFACE_CALL\n"); +#endif + + ASSERT(psInst->asOperands[0].eIndexRep[0] == OPERAND_INDEX_IMMEDIATE32); + + funcPointer = psInst->asOperands[0].aui32ArraySizes[0]; + funcBodyIndex = psInst->ui32FuncIndexWithinInterface; + + ui32NumBodiesPerTable = psContext->psShader->funcPointer[funcPointer].ui32NumBodiesPerTable; + + varFound = psContext->psShader->sInfo.GetInterfaceVarFromOffset(funcPointer, &psVar); + + ASSERT(varFound); + + name = &psVar->name[0]; + + psContext->AddIndentation(); + bcatcstr(glsl, name); + TranslateOperandIndexMAD(&psInst->asOperands[0], 1, ui32NumBodiesPerTable, funcBodyIndex); + //bformata(glsl, "[%d]", funcBodyIndex); + bcatcstr(glsl, "();\n"); + break; + } + case OPCODE_LABEL: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//LABEL\n"); +#endif + --psContext->indent; + psContext->AddIndentation(); + bcatcstr(glsl, "}\n"); //Closing brace ends the previous function. + psContext->AddIndentation(); + + bcatcstr(glsl, "subroutine(SubroutineType)\n"); + bcatcstr(glsl, "void "); + TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); + bcatcstr(glsl, "(){\n"); + ++psContext->indent; + break; + } + case OPCODE_COUNTBITS: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//COUNTBITS\n"); +#endif + psContext->AddIndentation(); + TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER | TO_FLAG_DESTINATION); + bcatcstr(glsl, " = bitCount("); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER); + bcatcstr(glsl, ");\n"); + break; + } + case OPCODE_FIRSTBIT_HI: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//FIRSTBIT_HI\n"); +#endif + psContext->AddIndentation(); + TranslateOperand(&psInst->asOperands[0], TO_FLAG_UNSIGNED_INTEGER | TO_FLAG_DESTINATION); + bcatcstr(glsl, " = findMSB("); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER); + bcatcstr(glsl, ");\n"); + break; + } + case OPCODE_FIRSTBIT_LO: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//FIRSTBIT_LO\n"); +#endif + psContext->AddIndentation(); + TranslateOperand(&psInst->asOperands[0], TO_FLAG_UNSIGNED_INTEGER | TO_FLAG_DESTINATION); + bcatcstr(glsl, " = findLSB("); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER); + bcatcstr(glsl, ");\n"); + break; + } + case OPCODE_FIRSTBIT_SHI: //signed high + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//FIRSTBIT_SHI\n"); +#endif + psContext->AddIndentation(); + TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER | TO_FLAG_DESTINATION); + bcatcstr(glsl, " = findMSB("); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER); + bcatcstr(glsl, ");\n"); + break; + } + case OPCODE_BFREV: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//BFREV\n"); +#endif + psContext->AddIndentation(); + TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER | TO_FLAG_DESTINATION); + bcatcstr(glsl, " = bitfieldReverse("); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER); + bcatcstr(glsl, ");\n"); + break; + } + case OPCODE_BFI: + { + uint32_t destMask = psInst->asOperands[0].GetAccessMask(); + uint32_t numelements_width = psInst->asOperands[1].GetNumSwizzleElements(); + uint32_t numelements_offset = psInst->asOperands[2].GetNumSwizzleElements(); + uint32_t numelements_dest = psInst->asOperands[0].GetNumSwizzleElements(); + uint32_t numoverall_elements = std::min(std::min(numelements_width, numelements_offset), numelements_dest); + uint32_t i, j, k; +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//BFI\n"); +#endif + if (psContext->psShader->eTargetLanguage == LANG_ES_300) + UseExtraFunctionDependency("int_bitfieldInsert"); + + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], SVT_INT, numoverall_elements, &numParenthesis); + + if (numoverall_elements == 1) + bformata(glsl, "int("); + else + bformata(glsl, "ivec%d(", numoverall_elements); + + k = 0; + for (i = 0; i < 4; ++i) + { + if ((destMask & (1 << i)) == 0) + continue; + + k++; + if (psContext->psShader->eTargetLanguage == LANG_ES_300) + bcatcstr(glsl, "int_bitfieldInsert("); + else + bcatcstr(glsl, "bitfieldInsert("); + + for (j = 4; j >= 1; --j) + { + TranslateOperand(&psInst->asOperands[j], TO_FLAG_INTEGER, 1 << i); + if (j != 1) + bcatcstr(glsl, ","); + } + + bcatcstr(glsl, ") "); + if (k != numoverall_elements) + bcatcstr(glsl, ", "); + } + bcatcstr(glsl, ")"); + AddAssignPrologue(numParenthesis); + break; + } + case OPCODE_CUT: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//CUT\n"); +#endif + psContext->AddIndentation(); + bcatcstr(glsl, "EndPrimitive();\n"); + break; + } + case OPCODE_EMIT: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//EMIT\n"); +#endif + if (psContext->psShader->asPhases[psContext->currentPhase].hasPostShaderCode) + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//--- Post shader code ---\n"); +#endif + bconcat(glsl, psContext->psShader->asPhases[psContext->currentPhase].postShaderCode); +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//--- End post shader code ---\n"); +#endif + } + + psContext->AddIndentation(); + bcatcstr(glsl, "EmitVertex();\n"); + break; + } + case OPCODE_EMITTHENCUT: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//EMITTHENCUT\n"); +#endif + psContext->AddIndentation(); + bcatcstr(glsl, "EmitVertex();\n"); + psContext->AddIndentation(); + bcatcstr(glsl, "EndPrimitive();\n"); + break; + } + + case OPCODE_CUT_STREAM: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//CUT_STREAM\n"); +#endif + psContext->AddIndentation(); + ASSERT(psInst->asOperands[0].eType == OPERAND_TYPE_STREAM); + if (psContext->psShader->eTargetLanguage < LANG_400 || psInst->asOperands[0].ui32RegisterNumber == 0) + { + // ES geom shaders only support one stream. + bcatcstr(glsl, "EndPrimitive();\n"); + } + else + { + bcatcstr(glsl, "EndStreamPrimitive("); + TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); + bcatcstr(glsl, ");\n"); + } + + break; + } + case OPCODE_EMIT_STREAM: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//EMIT_STREAM\n"); +#endif + if (psContext->psShader->asPhases[psContext->currentPhase].hasPostShaderCode) + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//--- Post shader code ---\n"); +#endif + bconcat(glsl, psContext->psShader->asPhases[psContext->currentPhase].postShaderCode); +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//--- End post shader code ---\n"); +#endif + } + + psContext->AddIndentation(); + + ASSERT(psInst->asOperands[0].eType == OPERAND_TYPE_STREAM); + if (psContext->psShader->eTargetLanguage < LANG_400 || psInst->asOperands[0].ui32RegisterNumber == 0) + { + // ES geom shaders only support one stream. + bcatcstr(glsl, "EmitVertex();\n"); + } + else + { + bcatcstr(glsl, "EmitStreamVertex("); + TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); + bcatcstr(glsl, ");\n"); + } + break; + } + case OPCODE_EMITTHENCUT_STREAM: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//EMITTHENCUT\n"); +#endif + ASSERT(psInst->asOperands[0].eType == OPERAND_TYPE_STREAM); + if (psContext->psShader->eTargetLanguage < LANG_400 || psInst->asOperands[0].ui32RegisterNumber == 0) + { + // ES geom shaders only support one stream. + bcatcstr(glsl, "EmitVertex();\n"); + psContext->AddIndentation(); + bcatcstr(glsl, "EndPrimitive();\n"); + } + else + { + bcatcstr(glsl, "EmitStreamVertex("); + TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); + bcatcstr(glsl, ");\n"); + psContext->AddIndentation(); + bcatcstr(glsl, "EndStreamPrimitive("); + TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); + bcatcstr(glsl, ");\n"); + } + break; + } + case OPCODE_REP: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//REP\n"); +#endif + //Need to handle nesting. + //Max of 4 for rep - 'Flow Control Limitations' http://msdn.microsoft.com/en-us/library/windows/desktop/bb219848(v=vs.85).aspx + + psContext->AddIndentation(); + bcatcstr(glsl, "RepCounter = "); + TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER, OPERAND_4_COMPONENT_MASK_X); + bcatcstr(glsl, ";\n"); + + psContext->AddIndentation(); + bcatcstr(glsl, "while(RepCounter!=0){\n"); + ++psContext->indent; + break; + } + case OPCODE_ENDREP: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ENDREP\n"); +#endif + psContext->AddIndentation(); + bcatcstr(glsl, "RepCounter--;\n"); + + --psContext->indent; + + psContext->AddIndentation(); + bcatcstr(glsl, "}\n"); + break; + } + case OPCODE_LOOP: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//LOOP\n"); +#endif + psContext->AddIndentation(); + + if (psInst->ui32NumOperands == 2) + { + //DX9 version + ASSERT(psInst->asOperands[0].eType == OPERAND_TYPE_SPECIAL_LOOPCOUNTER); + bcatcstr(glsl, "for("); + bcatcstr(glsl, "LoopCounter = "); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_NONE); + bcatcstr(glsl, ".y, ZeroBasedCounter = 0;"); + bcatcstr(glsl, "ZeroBasedCounter < "); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_NONE); + bcatcstr(glsl, ".x;"); + + bcatcstr(glsl, "LoopCounter += "); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_NONE); + bcatcstr(glsl, ".z, ZeroBasedCounter++){\n"); + ++psContext->indent; + } + else if (psInst->m_LoopInductors[1] != 0) + { + // Can emit as for + uint32_t typeFlags = TO_FLAG_INTEGER; + bcatcstr(glsl, "for("); + if (psInst->m_LoopInductors[0] != 0) + { + if (psInst->m_InductorRegister != 0) + { + // Do declaration here as well + switch (psInst->m_LoopInductors[0]->asOperands[0].GetDataType(psContext)) + { + case SVT_INT: + bcatcstr(glsl, "int "); + break; + case SVT_UINT: + bcatcstr(glsl, "uint "); + typeFlags = TO_FLAG_UNSIGNED_INTEGER; + break; + default: + ASSERT(0); + break; + } + } + TranslateInstruction(psInst->m_LoopInductors[0], true); + } + bcatcstr(glsl, " ; "); + bool negateCondition = psInst->m_LoopInductors[1]->eBooleanTestType != INSTRUCTION_TEST_NONZERO; + bool negateOrder = false; + + // Yet Another NVidia OSX shader compiler bug workaround (really nvidia, get your s#!t together): + // For reasons unfathomable to us, this breaks SSAO effect on OSX (case 756028) + // Broken: for(int ti_loop_1 = int(int(0xFFFFFFFCu)) ; 4 >= ti_loop_1 ; ti_loop_1++) + // Works: for (int ti_loop_1 = int(int(0xFFFFFFFCu)); ti_loop_1 <= 4; ti_loop_1++) + // + // So, check if the first argument is an immediate value, and if so, switch the order or the operands + // (and adjust condition) + if (psInst->m_LoopInductors[1]->asOperands[1].eType == OPERAND_TYPE_IMMEDIATE32) + negateOrder = true; + + const char *cmpOp = ""; + switch (psInst->m_LoopInductors[1]->eOpcode) + { + case OPCODE_IGE: + if (negateOrder) + cmpOp = negateCondition ? ">" : "<="; + else + cmpOp = negateCondition ? "<" : ">="; + break; + case OPCODE_ILT: + if (negateOrder) + cmpOp = negateCondition ? "<=" : ">"; + else + cmpOp = negateCondition ? ">=" : "<"; + break; + case OPCODE_IEQ: + // No need to change the comparison if negateOrder is true + cmpOp = negateCondition ? "!=" : "=="; + if (psInst->m_LoopInductors[1]->asOperands[0].GetDataType(psContext) == SVT_UINT) + typeFlags = TO_FLAG_UNSIGNED_INTEGER; + break; + case OPCODE_INE: + // No need to change the comparison if negateOrder is true + cmpOp = negateCondition ? "==" : "!="; + if (psInst->m_LoopInductors[1]->asOperands[0].GetDataType(psContext) == SVT_UINT) + typeFlags = TO_FLAG_UNSIGNED_INTEGER; + break; + case OPCODE_UGE: + if (negateOrder) + cmpOp = negateCondition ? ">" : "<="; + else + cmpOp = negateCondition ? "<" : ">="; + typeFlags = TO_FLAG_UNSIGNED_INTEGER; + break; + case OPCODE_ULT: + if (negateOrder) + cmpOp = negateCondition ? "<=" : ">"; + else + cmpOp = negateCondition ? ">=" : "<"; + typeFlags = TO_FLAG_UNSIGNED_INTEGER; + break; + + default: + ASSERT(0); + } + TranslateOperand(&psInst->m_LoopInductors[1]->asOperands[negateOrder ? 2 : 1], typeFlags); + bcatcstr(glsl, cmpOp); + TranslateOperand(&psInst->m_LoopInductors[1]->asOperands[negateOrder ? 1 : 2], typeFlags); + + bcatcstr(glsl, " ; "); + // One more shortcut: translate IADD tX, tX, 1 to tX++ + if (HLSLcc::IsAddOneInstruction(psInst->m_LoopInductors[3])) + { + TranslateOperand(&psInst->m_LoopInductors[3]->asOperands[0], TO_FLAG_DESTINATION); + bcatcstr(glsl, "++"); + } + else + TranslateInstruction(psInst->m_LoopInductors[3], true); + + bcatcstr(glsl, ")\n"); + psContext->AddIndentation(); + bcatcstr(glsl, "{\n"); + ++psContext->indent; + } + else + { + if (psContext->psShader->eTargetLanguage == LANG_ES_100) + { + bstring name; + name = bformat(HLSLCC_TEMP_PREFIX "i_while_true_%d", m_NumDeclaredWhileTrueLoops++); + + // Workaround limitation with WebGL 1.0 GLSL, as we're expecting something to break the loop in any case + // Fragment shaders on some devices don't like too large integer constants (Adreno 3xx, for example) + int hardcoded_iteration_limit = (psContext->psShader->eShaderType == PIXEL_SHADER) ? 0x7FFF : 0x7FFFFFFF; + + bformata(glsl, "for(int %s = 0 ; %s < 0x%X ; %s++){\n", name->data, name->data, hardcoded_iteration_limit, name->data); + bdestroy(name); + } + else + { + bcatcstr(glsl, "while(true){\n"); + } + ++psContext->indent; + } + break; + } + case OPCODE_ENDLOOP: + { + --psContext->indent; +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ENDLOOP\n"); +#endif + psContext->AddIndentation(); + bcatcstr(glsl, "}\n"); + break; + } + case OPCODE_BREAK: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//BREAK\n"); +#endif + psContext->AddIndentation(); + bcatcstr(glsl, "break;\n"); + break; + } + case OPCODE_BREAKC: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//BREAKC\n"); +#endif + psContext->AddIndentation(); + + TranslateConditional(psInst, glsl); + break; + } + case OPCODE_CONTINUEC: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//CONTINUEC\n"); +#endif + psContext->AddIndentation(); + + TranslateConditional(psInst, glsl); + break; + } + case OPCODE_IF: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IF\n"); +#endif + psContext->AddIndentation(); + + TranslateConditional(psInst, glsl); + ++psContext->indent; + break; + } + case OPCODE_RETC: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//RETC\n"); +#endif + psContext->AddIndentation(); + + TranslateConditional(psInst, glsl); + break; + } + case OPCODE_ELSE: + { + --psContext->indent; +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ELSE\n"); +#endif + psContext->AddIndentation(); + bcatcstr(glsl, "} else {\n"); + psContext->indent++; + break; + } + case OPCODE_ENDSWITCH: + case OPCODE_ENDIF: + { + --psContext->indent; + psContext->AddIndentation(); + bcatcstr(glsl, "//ENDIF\n"); + psContext->AddIndentation(); + bcatcstr(glsl, "}\n"); + break; + } + case OPCODE_CONTINUE: + { + psContext->AddIndentation(); + bcatcstr(glsl, "continue;\n"); + break; + } + case OPCODE_DEFAULT: + { + --psContext->indent; + psContext->AddIndentation(); + bcatcstr(glsl, "default:\n"); + ++psContext->indent; + break; + } + case OPCODE_NOP: + { + break; + } + case OPCODE_SYNC: + { + const uint32_t ui32SyncFlags = psInst->ui32SyncFlags; + +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//SYNC\n"); +#endif + + if (ui32SyncFlags & SYNC_THREAD_GROUP_SHARED_MEMORY) + { + psContext->AddIndentation(); + bcatcstr(glsl, "memoryBarrierShared();\n"); + } + if (ui32SyncFlags & (SYNC_UNORDERED_ACCESS_VIEW_MEMORY_GROUP | SYNC_UNORDERED_ACCESS_VIEW_MEMORY_GLOBAL)) + { + psContext->AddIndentation(); + bcatcstr(glsl, "memoryBarrier();\n"); + } + if (ui32SyncFlags & SYNC_THREADS_IN_GROUP) + { + psContext->AddIndentation(); + bcatcstr(glsl, "barrier();\n"); + } + break; + } + case OPCODE_SWITCH: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//SWITCH\n"); +#endif + psContext->AddIndentation(); + bcatcstr(glsl, "switch(int("); + TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER); + bcatcstr(glsl, ")){\n"); + + psContext->indent += 2; + break; + } + case OPCODE_CASE: + { + --psContext->indent; +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//case\n"); +#endif + psContext->AddIndentation(); + + bcatcstr(glsl, "case "); + TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER); + bcatcstr(glsl, ":\n"); + + ++psContext->indent; + break; + } + case OPCODE_EQ: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//EQ\n"); +#endif + AddComparison(psInst, CMP_EQ, TO_FLAG_NONE); + break; + } + case OPCODE_USHR: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//USHR\n"); +#endif + CallBinaryOp(">>", psInst, 0, 1, 2, SVT_UINT); + break; + } + case OPCODE_ISHL: + { + SHADER_VARIABLE_TYPE eType = SVT_INT; + +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ISHL\n"); +#endif + + if (psInst->asOperands[0].GetDataType(psContext) == SVT_UINT) + { + eType = SVT_UINT; + } + + CallBinaryOp("<<", psInst, 0, 1, 2, eType); + break; + } + case OPCODE_ISHR: + { + SHADER_VARIABLE_TYPE eType = SVT_INT; +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ISHR\n"); +#endif + + if (psInst->asOperands[0].GetDataType(psContext) == SVT_UINT) + { + eType = SVT_UINT; + } + + CallBinaryOp(">>", psInst, 0, 1, 2, eType); + break; + } + case OPCODE_LD: + case OPCODE_LD_MS: + { + const ResourceBinding* psBinding = 0; +#ifdef _DEBUG + psContext->AddIndentation(); + if (psInst->eOpcode == OPCODE_LD) + bcatcstr(glsl, "//LD\n"); + else + bcatcstr(glsl, "//LD_MS\n"); +#endif + + psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_TEXTURE, psInst->asOperands[2].ui32RegisterNumber, &psBinding); + + TranslateTexelFetch(psInst, psBinding, glsl); + break; + } + case OPCODE_DISCARD: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//DISCARD\n"); +#endif + psContext->AddIndentation(); + if (psContext->psShader->ui32MajorVersion <= 3) + { + bcatcstr(glsl, "if(any(lessThan(("); + TranslateOperand(&psInst->asOperands[0], TO_FLAG_NONE); + + if (psContext->psShader->ui32MajorVersion == 1) + { + /* SM1.X only kills based on the rgb channels */ + bcatcstr(glsl, ").xyz, vec3(0)))){discard;}\n"); + } + else + { + bcatcstr(glsl, "), vec4(0)))){discard;}\n"); + } + } + else if (psInst->eBooleanTestType == INSTRUCTION_TEST_ZERO) + { + bcatcstr(glsl, "if(("); + TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER); + bcatcstr(glsl, ")==0){discard;}\n"); + } + else + { + ASSERT(psInst->eBooleanTestType == INSTRUCTION_TEST_NONZERO); + bcatcstr(glsl, "if(("); + TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER); + bcatcstr(glsl, ")!=0){discard;}\n"); + } + break; + } + case OPCODE_LOD: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//LOD\n"); +#endif + //LOD computes the following vector (ClampedLOD, NonClampedLOD, 0, 0) + + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], SVT_FLOAT, 4, &numParenthesis); + + //If the core language does not have query-lod feature, + //then the extension is used. The name of the function + //changed between extension and core. + if (HaveQueryLod(psContext->psShader->eTargetLanguage)) + { + bcatcstr(glsl, "textureQueryLod("); + } + else + { + bcatcstr(glsl, "textureQueryLOD("); + } + + TranslateOperand(&psInst->asOperands[2], TO_FLAG_NONE); + bcatcstr(glsl, ","); + TranslateTexCoord( + psContext->psShader->aeResourceDims[psInst->asOperands[2].ui32RegisterNumber], + &psInst->asOperands[1]); + bcatcstr(glsl, ")"); + + //The swizzle on srcResource allows the returned values to be swizzled arbitrarily before they are written to the destination. + + // iWriteMaskEnabled is forced off during DecodeOperand because swizzle on sampler uniforms + // does not make sense. But need to re-enable to correctly swizzle this particular instruction. + psInst->asOperands[2].iWriteMaskEnabled = 1; + TranslateOperandSwizzleWithMask(psContext, &psInst->asOperands[2], psInst->asOperands[0].GetAccessMask(), 0); + AddAssignPrologue(numParenthesis); + break; + } + case OPCODE_EVAL_CENTROID: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//EVAL_CENTROID\n"); +#endif + psContext->AddIndentation(); + TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); + bcatcstr(glsl, " = interpolateAtCentroid("); + //interpolateAtCentroid accepts in-qualified variables. + //As long as bytecode only writes vX registers in declarations + //we should be able to use the declared name directly. + TranslateOperand(&psInst->asOperands[1], TO_FLAG_DECLARATION_NAME); + bcatcstr(glsl, ");\n"); + break; + } + case OPCODE_EVAL_SAMPLE_INDEX: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//EVAL_SAMPLE_INDEX\n"); +#endif + psContext->AddIndentation(); + TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); + bcatcstr(glsl, " = interpolateAtSample("); + //interpolateAtSample accepts in-qualified variables. + //As long as bytecode only writes vX registers in declarations + //we should be able to use the declared name directly. + TranslateOperand(&psInst->asOperands[1], TO_FLAG_DECLARATION_NAME); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[2], TO_FLAG_INTEGER); + bcatcstr(glsl, ");\n"); + break; + } + case OPCODE_EVAL_SNAPPED: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//EVAL_SNAPPED\n"); +#endif + psContext->AddIndentation(); + TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); + bcatcstr(glsl, " = interpolateAtOffset("); + //interpolateAtOffset accepts in-qualified variables. + //As long as bytecode only writes vX registers in declarations + //we should be able to use the declared name directly. + TranslateOperand(&psInst->asOperands[1], TO_FLAG_DECLARATION_NAME); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[2], TO_FLAG_INTEGER); + bcatcstr(glsl, ".xy);\n"); + break; + } + case OPCODE_LD_STRUCTURED: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//LD_STRUCTURED\n"); +#endif + TranslateShaderStorageLoad(psInst); + break; + } + case OPCODE_LD_UAV_TYPED: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//LD_UAV_TYPED\n"); +#endif + Operand* psDest = &psInst->asOperands[0]; + Operand* psSrc = &psInst->asOperands[2]; + Operand* psSrcAddr = &psInst->asOperands[1]; + + int srcCount = psSrc->GetNumSwizzleElements(); + int numParenthesis = 0; + uint32_t compMask = 0; + + switch (psInst->eResDim) + { + case RESOURCE_DIMENSION_TEXTURE3D: + case RESOURCE_DIMENSION_TEXTURE2DARRAY: + case RESOURCE_DIMENSION_TEXTURE2DMSARRAY: + case RESOURCE_DIMENSION_TEXTURECUBEARRAY: + compMask |= (1 << 2); + case RESOURCE_DIMENSION_TEXTURECUBE: + case RESOURCE_DIMENSION_TEXTURE1DARRAY: + case RESOURCE_DIMENSION_TEXTURE2D: + case RESOURCE_DIMENSION_TEXTURE2DMS: + compMask |= (1 << 1); + case RESOURCE_DIMENSION_TEXTURE1D: + case RESOURCE_DIMENSION_BUFFER: + compMask |= 1; + break; + default: + ASSERT(0); + break; + } + + SHADER_VARIABLE_TYPE srcDataType = SVT_FLOAT; + const ResourceBinding* psBinding = 0; + psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_UAV, psSrc->ui32RegisterNumber, &psBinding); + switch (psBinding->ui32ReturnType) + { + case RETURN_TYPE_FLOAT: + srcDataType = SVT_FLOAT; + break; + case RETURN_TYPE_SINT: + srcDataType = SVT_INT; + break; + case RETURN_TYPE_UINT: + srcDataType = SVT_UINT; + break; + case RETURN_TYPE_SNORM: + case RETURN_TYPE_UNORM: + srcDataType = SVT_FLOAT; + break; + default: + ASSERT(0); + // Suppress uninitialised variable warning + srcDataType = SVT_VOID; + break; + } + + psContext->AddIndentation(); + AddAssignToDest(psDest, srcDataType, srcCount, &numParenthesis); + bcatcstr(glsl, "imageLoad("); + TranslateOperand(psSrc, TO_FLAG_NAME_ONLY); + bcatcstr(glsl, ", "); + TranslateOperand(psSrcAddr, TO_FLAG_INTEGER, compMask); + bcatcstr(glsl, ")"); + TranslateOperandSwizzle(psContext, &psInst->asOperands[0], 0); + AddAssignPrologue(numParenthesis); + break; + } + case OPCODE_STORE_RAW: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//STORE_RAW\n"); +#endif + TranslateShaderStorageStore(psInst); + break; + } + case OPCODE_STORE_STRUCTURED: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//STORE_STRUCTURED\n"); +#endif + TranslateShaderStorageStore(psInst); + break; + } + + case OPCODE_STORE_UAV_TYPED: + { + const ResourceBinding* psRes; + int foundResource; + uint32_t flags = TO_FLAG_INTEGER; + uint32_t opMask = OPERAND_4_COMPONENT_MASK_ALL; +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//STORE_UAV_TYPED\n"); +#endif + psContext->AddIndentation(); + + foundResource = psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_UAV, + psInst->asOperands[0].ui32RegisterNumber, + &psRes); + + ASSERT(foundResource); + + bcatcstr(glsl, "imageStore("); + TranslateOperand(&psInst->asOperands[0], TO_FLAG_NAME_ONLY); + bcatcstr(glsl, ", "); + + switch (psRes->eDimension) + { + case REFLECT_RESOURCE_DIMENSION_TEXTURE1D: + case REFLECT_RESOURCE_DIMENSION_BUFFER: + opMask = OPERAND_4_COMPONENT_MASK_X; + break; + case REFLECT_RESOURCE_DIMENSION_TEXTURE2D: + case REFLECT_RESOURCE_DIMENSION_TEXTURE1DARRAY: + case REFLECT_RESOURCE_DIMENSION_TEXTURE2DMS: + opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y; + flags |= TO_AUTO_EXPAND_TO_VEC2; + break; + case REFLECT_RESOURCE_DIMENSION_TEXTURE2DARRAY: + case REFLECT_RESOURCE_DIMENSION_TEXTURE3D: + case REFLECT_RESOURCE_DIMENSION_TEXTURE2DMSARRAY: + case REFLECT_RESOURCE_DIMENSION_TEXTURECUBE: + opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y | OPERAND_4_COMPONENT_MASK_Z; + flags |= TO_AUTO_EXPAND_TO_VEC3; + break; + case REFLECT_RESOURCE_DIMENSION_TEXTURECUBEARRAY: + flags |= TO_AUTO_EXPAND_TO_VEC4; + break; + default: + ASSERT(0); + break; + } + + TranslateOperand(&psInst->asOperands[1], flags, opMask); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[2], ResourceReturnTypeToFlag(psRes->ui32ReturnType)); + bformata(glsl, ");\n"); + + break; + } + case OPCODE_LD_RAW: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//LD_RAW\n"); +#endif + + TranslateShaderStorageLoad(psInst); + break; + } + + case OPCODE_ATOMIC_AND: + case OPCODE_ATOMIC_OR: + case OPCODE_ATOMIC_XOR: + case OPCODE_ATOMIC_CMP_STORE: + case OPCODE_ATOMIC_IADD: + case OPCODE_ATOMIC_IMAX: + case OPCODE_ATOMIC_IMIN: + case OPCODE_ATOMIC_UMAX: + case OPCODE_ATOMIC_UMIN: + case OPCODE_IMM_ATOMIC_IADD: + case OPCODE_IMM_ATOMIC_AND: + case OPCODE_IMM_ATOMIC_OR: + case OPCODE_IMM_ATOMIC_XOR: + case OPCODE_IMM_ATOMIC_EXCH: + case OPCODE_IMM_ATOMIC_CMP_EXCH: + case OPCODE_IMM_ATOMIC_IMAX: + case OPCODE_IMM_ATOMIC_IMIN: + case OPCODE_IMM_ATOMIC_UMAX: + case OPCODE_IMM_ATOMIC_UMIN: + { + TranslateAtomicMemOp(psInst); + break; + } + case OPCODE_UBFE: + case OPCODE_IBFE: + { + int numParenthesis = 0; + int i; + uint32_t writeMask = psInst->asOperands[0].GetAccessMask(); + SHADER_VARIABLE_TYPE dataType = psInst->eOpcode == OPCODE_UBFE ? SVT_UINT : SVT_INT; + uint32_t flags = psInst->eOpcode == OPCODE_UBFE ? TO_AUTO_BITCAST_TO_UINT : TO_AUTO_BITCAST_TO_INT; +#ifdef _DEBUG + psContext->AddIndentation(); + if (psInst->eOpcode == OPCODE_UBFE) + bcatcstr(glsl, "//OPCODE_UBFE\n"); + else + bcatcstr(glsl, "//OPCODE_IBFE\n"); +#endif + // Need to open this up, GLSL bitfieldextract uses same offset and width for all components + for (i = 0; i < 4; i++) + { + if ((writeMask & (1 << i)) == 0) + continue; + psContext->AddIndentation(); + psInst->asOperands[0].ui32CompMask = (1 << i); + psInst->asOperands[0].eSelMode = OPERAND_4_COMPONENT_MASK_MODE; + AddAssignToDest(&psInst->asOperands[0], dataType, 1, &numParenthesis); + + bcatcstr(glsl, "bitfieldExtract("); + TranslateOperand(&psInst->asOperands[3], flags, (1 << i)); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[2], TO_AUTO_BITCAST_TO_INT, (1 << i)); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[1], TO_AUTO_BITCAST_TO_INT, (1 << i)); + bcatcstr(glsl, ")"); + AddAssignPrologue(numParenthesis); + } + break; + } + case OPCODE_RCP: + { + const uint32_t destElemCount = psInst->asOperands[0].GetNumSwizzleElements(); + const uint32_t srcElemCount = psInst->asOperands[1].GetNumSwizzleElements(); + int numParenthesis = 0; +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//RCP\n"); +#endif + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], SVT_FLOAT, srcElemCount, &numParenthesis); + bcatcstr(glsl, GetConstructorForTypeGLSL(psContext, SVT_FLOAT, destElemCount, false)); + bcatcstr(glsl, "(1.0) / "); + bcatcstr(glsl, GetConstructorForTypeGLSL(psContext, SVT_FLOAT, destElemCount, false)); + bcatcstr(glsl, "("); + numParenthesis++; + TranslateOperand(&psInst->asOperands[1], TO_FLAG_NONE, psInst->asOperands[0].GetAccessMask()); + AddAssignPrologue(numParenthesis); + break; + } + case OPCODE_F32TOF16: + { + uint32_t writeMask = psInst->asOperands[0].GetAccessMask(); + +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//F32TOF16\n"); +#endif + + for (int i = 0; i < 4; i++) + { + if ((writeMask & (1 << i)) == 0) + continue; + psContext->AddIndentation(); + psInst->asOperands[0].ui32CompMask = (1 << i); + psInst->asOperands[0].eSelMode = OPERAND_4_COMPONENT_MASK_MODE; + AddAssignToDest(&psInst->asOperands[0], SVT_UINT, 1, &numParenthesis); + + bcatcstr(glsl, "packHalf2x16(vec2("); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_NONE, (1 << i)); + bcatcstr(glsl, ", 0.0))"); + AddAssignPrologue(numParenthesis); + } + break; + } + case OPCODE_F16TOF32: + { + uint32_t writeMask = psInst->asOperands[0].GetAccessMask(); + +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//F16TOF32\n"); +#endif + + for (int i = 0; i < 4; i++) + { + if ((writeMask & (1 << i)) == 0) + continue; + psContext->AddIndentation(); + psInst->asOperands[0].ui32CompMask = (1 << i); + psInst->asOperands[0].eSelMode = OPERAND_4_COMPONENT_MASK_MODE; + AddAssignToDest(&psInst->asOperands[0], SVT_FLOAT, 1, &numParenthesis); + + bcatcstr(glsl, "unpackHalf2x16("); + TranslateOperand(&psInst->asOperands[1], TO_AUTO_BITCAST_TO_UINT, (1 << i)); + bcatcstr(glsl, ").x"); + AddAssignPrologue(numParenthesis); + } + break; + } + case OPCODE_INEG: + { + int numParenthesis = 0; +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//INEG\n"); +#endif + //dest = 0 - src0 + psContext->AddIndentation(); + + AddAssignToDest(&psInst->asOperands[0], SVT_INT, psInst->asOperands[1].GetNumSwizzleElements(), &numParenthesis); + + bcatcstr(glsl, "0 - "); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER, psInst->asOperands[0].GetAccessMask()); + AddAssignPrologue(numParenthesis); + break; + } + case OPCODE_DERIV_RTX_COARSE: + case OPCODE_DERIV_RTX_FINE: + case OPCODE_DERIV_RTX: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//DERIV_RTX\n"); +#endif + CallHelper1("dFdx", psInst, 0, 1, 1); + break; + } + case OPCODE_DERIV_RTY_COARSE: + case OPCODE_DERIV_RTY_FINE: + case OPCODE_DERIV_RTY: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//DERIV_RTY\n"); +#endif + CallHelper1("dFdy", psInst, 0, 1, 1); + break; + } + case OPCODE_LRP: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//LRP\n"); +#endif + CallHelper3("mix", psInst, 0, 2, 3, 1, 1); + break; + } + case OPCODE_DP2ADD: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//DP2ADD\n"); +#endif + psContext->AddIndentation(); + TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); + bcatcstr(glsl, " = dot(vec2("); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_NONE); + bcatcstr(glsl, "), vec2("); + TranslateOperand(&psInst->asOperands[2], TO_FLAG_NONE); + bcatcstr(glsl, ")) + "); + TranslateOperand(&psInst->asOperands[3], TO_FLAG_NONE); + bcatcstr(glsl, ";\n"); + break; + } + case OPCODE_POW: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//POW\n"); +#endif + psContext->AddIndentation(); + TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); + bcatcstr(glsl, " = pow(abs("); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_NONE); + bcatcstr(glsl, "), "); + TranslateOperand(&psInst->asOperands[2], TO_FLAG_NONE); + bcatcstr(glsl, ");\n"); + break; + } + + case OPCODE_IMM_ATOMIC_ALLOC: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_ALLOC\n"); +#endif + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], SVT_UINT, 1, &numParenthesis); + if (isVulkan || avoidAtomicCounter) + bcatcstr(glsl, "atomicAdd("); + else + bcatcstr(glsl, "atomicCounterIncrement("); + ResourceName(glsl, psContext, RGROUP_UAV, psInst->asOperands[1].ui32RegisterNumber, 0); + bformata(glsl, "_counter"); + if (isVulkan || avoidAtomicCounter) + bcatcstr(glsl, ", 1u)"); + else + bcatcstr(glsl, ")"); + AddAssignPrologue(numParenthesis); + break; + } + case OPCODE_IMM_ATOMIC_CONSUME: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_CONSUME\n"); +#endif + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], SVT_UINT, 1, &numParenthesis); + if (isVulkan || avoidAtomicCounter) + bcatcstr(glsl, "(atomicAdd("); + else + bcatcstr(glsl, "atomicCounterDecrement("); + ResourceName(glsl, psContext, RGROUP_UAV, psInst->asOperands[1].ui32RegisterNumber, 0); + bformata(glsl, "_counter"); + if (isVulkan || avoidAtomicCounter) + bcatcstr(glsl, ", 0xffffffffu) + 0xffffffffu)"); + else + bcatcstr(glsl, ")"); + AddAssignPrologue(numParenthesis); + break; + } + + case OPCODE_NOT: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//NOT\n"); +#endif + // Adreno 3xx fails on ~a with "Internal compiler error: unexpected operator", use op_not instead + if (!HaveNativeBitwiseOps(psContext->psShader->eTargetLanguage) || psContext->psShader->eTargetLanguage == LANG_ES_300) + { + UseExtraFunctionDependency("op_not"); + + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], SVT_INT, psInst->asOperands[1].GetNumSwizzleElements(), &numParenthesis); + bcatcstr(glsl, "op_not("); + numParenthesis++; + TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER, psInst->asOperands[0].GetAccessMask()); + AddAssignPrologue(numParenthesis); + } + else + { + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], SVT_INT, psInst->asOperands[1].GetNumSwizzleElements(), &numParenthesis); + + bcatcstr(glsl, "~"); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER, psInst->asOperands[0].GetAccessMask()); + AddAssignPrologue(numParenthesis); + } + break; + } + case OPCODE_XOR: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//XOR\n"); +#endif + CallBinaryOp("^", psInst, 0, 1, 2, SVT_UINT); + break; + } + case OPCODE_RESINFO: + { + uint32_t destElem; + uint32_t mask = psInst->asOperands[0].GetAccessMask(); + +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//RESINFO\n"); +#endif + + for (destElem = 0; destElem < 4; ++destElem) + { + if (1 << destElem & mask) + GetResInfoData(psInst, psInst->asOperands[2].aui32Swizzle[destElem], destElem); + } + + break; + } + case OPCODE_BUFINFO: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//BUFINFO\n"); +#endif + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], SVT_INT, 1, &numParenthesis); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_NAME_ONLY); + bcatcstr(glsl, "_buf.length()"); + AddAssignPrologue(numParenthesis); + break; + } + case OPCODE_SAMPLE_INFO: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//SAMPLE_INFO\n"); +#endif + const RESINFO_RETURN_TYPE eResInfoReturnType = psInst->eResInfoReturnType; + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_FLOAT ? SVT_FLOAT : SVT_UINT, 1, &numParenthesis); + bcatcstr(glsl, "textureSamples("); + std::string texName = ResourceName(psContext, RGROUP_TEXTURE, psInst->asOperands[1].ui32RegisterNumber, 0); + if (psContext->IsVulkan()) + { + std::string vulkanSamplerName = GetVulkanDummySamplerName(); + + const RESOURCE_DIMENSION eResDim = psContext->psShader->aeResourceDims[psInst->asOperands[2].ui32RegisterNumber]; + std::string smpType = GetSamplerType(psContext, eResDim, psInst->asOperands[2].ui32RegisterNumber); + std::ostringstream oss; + oss << smpType; + oss << "(" << texName << ", " << vulkanSamplerName << ")"; + texName = oss.str(); + } + bcatcstr(glsl, texName.c_str()); + bcatcstr(glsl, ")"); + AddAssignPrologue(numParenthesis); + break; + } + case OPCODE_DMAX: + case OPCODE_DMIN: + case OPCODE_DMUL: + case OPCODE_DEQ: + case OPCODE_DGE: + case OPCODE_DLT: + case OPCODE_DNE: + case OPCODE_DMOV: + case OPCODE_DMOVC: + case OPCODE_DTOF: + case OPCODE_FTOD: + case OPCODE_DDIV: + case OPCODE_DFMA: + case OPCODE_DRCP: + case OPCODE_MSAD: + case OPCODE_DTOI: + case OPCODE_DTOU: + case OPCODE_ITOD: + case OPCODE_UTOD: + default: + { + ASSERT(0); + break; + } + } + + if (psInst->bSaturate) //Saturate is only for floating point data (float opcodes or MOV) + { + int dstCount = psInst->asOperands[0].GetNumSwizzleElements(); + + const bool workaroundAdrenoBugs = psContext->psShader->eTargetLanguage == LANG_ES_300; + + if (workaroundAdrenoBugs) + bcatcstr(glsl, "#ifdef UNITY_ADRENO_ES3\n"); + + for (int i = workaroundAdrenoBugs ? 0 : 1; i < 2; ++i) + { + const bool generateWorkaround = (i == 0); + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], SVT_FLOAT, dstCount, &numParenthesis); + bcatcstr(glsl, generateWorkaround ? "min(max(" : "clamp("); + TranslateOperand(&psInst->asOperands[0], TO_AUTO_BITCAST_TO_FLOAT); + bcatcstr(glsl, generateWorkaround ? ", 0.0), 1.0)" : ", 0.0, 1.0)"); + AddAssignPrologue(numParenthesis); + + if (generateWorkaround) + bcatcstr(glsl, "#else\n"); + } + + if (workaroundAdrenoBugs) + bcatcstr(glsl, "#endif\n"); + } } diff --git a/src/toGLSLOperand.cpp b/src/toGLSLOperand.cpp index 4cb2f03..40f163c 100644 --- a/src/toGLSLOperand.cpp +++ b/src/toGLSLOperand.cpp @@ -31,89 +31,89 @@ static const char *squareBrackets[2][2] = { { "DynamicIndex(", ")" }, { "[", "]" // Returns nonzero if types are just different precisions of the same underlying type static bool AreTypesCompatible(SHADER_VARIABLE_TYPE a, uint32_t ui32TOFlag) { - SHADER_VARIABLE_TYPE b = TypeFlagsToSVTType(ui32TOFlag); + SHADER_VARIABLE_TYPE b = TypeFlagsToSVTType(ui32TOFlag); - if (a == b) - return true; + if (a == b) + return true; - // Special case for array indices: both uint and int are fine - if ((ui32TOFlag & TO_FLAG_INTEGER) && (ui32TOFlag & TO_FLAG_UNSIGNED_INTEGER) && - (a == SVT_INT || a == SVT_INT16 || a == SVT_UINT || a == SVT_UINT16)) - return true; + // Special case for array indices: both uint and int are fine + if ((ui32TOFlag & TO_FLAG_INTEGER) && (ui32TOFlag & TO_FLAG_UNSIGNED_INTEGER) && + (a == SVT_INT || a == SVT_INT16 || a == SVT_UINT || a == SVT_UINT16)) + return true; - if ((a == SVT_FLOAT || a == SVT_FLOAT16 || a == SVT_FLOAT10) && - (b == SVT_FLOAT || b == SVT_FLOAT16 || b == SVT_FLOAT10)) - return true; + if ((a == SVT_FLOAT || a == SVT_FLOAT16 || a == SVT_FLOAT10) && + (b == SVT_FLOAT || b == SVT_FLOAT16 || b == SVT_FLOAT10)) + return true; - if ((a == SVT_INT || a == SVT_INT16 || a == SVT_INT12) && - (b == SVT_INT || b == SVT_INT16 || a == SVT_INT12)) - return true; + if ((a == SVT_INT || a == SVT_INT16 || a == SVT_INT12) && + (b == SVT_INT || b == SVT_INT16 || a == SVT_INT12)) + return true; - if ((a == SVT_UINT || a == SVT_UINT16) && - (b == SVT_UINT || b == SVT_UINT16)) - return true; + if ((a == SVT_UINT || a == SVT_UINT16) && + (b == SVT_UINT || b == SVT_UINT16)) + return true; - return false; + return false; } void TranslateOperandSwizzle(HLSLCrossCompilerContext* psContext, const Operand* psOperand, int iRebase) { - TranslateOperandSwizzleWithMask(psContext, psOperand, OPERAND_4_COMPONENT_MASK_ALL, iRebase); + TranslateOperandSwizzleWithMask(psContext, psOperand, OPERAND_4_COMPONENT_MASK_ALL, iRebase); } void TranslateOperandSwizzleWithMask(HLSLCrossCompilerContext* psContext, const Operand* psOperand, uint32_t ui32ComponentMask, int iRebase) { - TranslateOperandSwizzleWithMask(*psContext->currentGLSLString, psContext, psOperand, ui32ComponentMask, iRebase); + TranslateOperandSwizzleWithMask(*psContext->currentGLSLString, psContext, psOperand, ui32ComponentMask, iRebase); } void TranslateOperandSwizzleWithMask(bstring glsl, HLSLCrossCompilerContext* psContext, const Operand* psOperand, uint32_t ui32ComponentMask, int iRebase) { - uint32_t accessMask = ui32ComponentMask & psOperand->GetAccessMask(); - if(psOperand->eType == OPERAND_TYPE_INPUT) - { - int regSpace = psOperand->GetRegisterSpace(psContext); - // Skip swizzle for scalar inputs, but only if we haven't redirected them - if (regSpace == 0) - { - if ((psContext->psShader->asPhases[psContext->currentPhase].acInputNeedsRedirect[psOperand->ui32RegisterNumber] == 0) && - (psContext->psShader->abScalarInput[regSpace][psOperand->ui32RegisterNumber] & accessMask)) - { - return; - } - } - else - { - if ((psContext->psShader->asPhases[psContext->currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] == 0) && - (psContext->psShader->abScalarInput[regSpace][psOperand->ui32RegisterNumber] & accessMask)) - { - return; - } - } - } - if (psOperand->eType == OPERAND_TYPE_OUTPUT) - { - int regSpace = psOperand->GetRegisterSpace(psContext); - // Skip swizzle for scalar outputs, but only if we haven't redirected them - if (regSpace == 0) - { - if ((psContext->psShader->asPhases[psContext->currentPhase].acOutputNeedsRedirect[psOperand->ui32RegisterNumber] == 0) && - (psContext->psShader->abScalarOutput[regSpace][psOperand->ui32RegisterNumber] & accessMask)) - { - return; - } - } - else - { - if ((psContext->psShader->asPhases[psContext->currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] == 0) && - (psContext->psShader->abScalarOutput[regSpace][psOperand->ui32RegisterNumber] & accessMask)) - { - return; - } - } - } - - if(psOperand->eType == OPERAND_TYPE_CONSTANT_BUFFER) - { + uint32_t accessMask = ui32ComponentMask & psOperand->GetAccessMask(); + if (psOperand->eType == OPERAND_TYPE_INPUT) + { + int regSpace = psOperand->GetRegisterSpace(psContext); + // Skip swizzle for scalar inputs, but only if we haven't redirected them + if (regSpace == 0) + { + if ((psContext->psShader->asPhases[psContext->currentPhase].acInputNeedsRedirect[psOperand->ui32RegisterNumber] == 0) && + (psContext->psShader->abScalarInput[regSpace][psOperand->ui32RegisterNumber] & accessMask)) + { + return; + } + } + else + { + if ((psContext->psShader->asPhases[psContext->currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] == 0) && + (psContext->psShader->abScalarInput[regSpace][psOperand->ui32RegisterNumber] & accessMask)) + { + return; + } + } + } + if (psOperand->eType == OPERAND_TYPE_OUTPUT) + { + int regSpace = psOperand->GetRegisterSpace(psContext); + // Skip swizzle for scalar outputs, but only if we haven't redirected them + if (regSpace == 0) + { + if ((psContext->psShader->asPhases[psContext->currentPhase].acOutputNeedsRedirect[psOperand->ui32RegisterNumber] == 0) && + (psContext->psShader->abScalarOutput[regSpace][psOperand->ui32RegisterNumber] & accessMask)) + { + return; + } + } + else + { + if ((psContext->psShader->asPhases[psContext->currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] == 0) && + (psContext->psShader->abScalarOutput[regSpace][psOperand->ui32RegisterNumber] & accessMask)) + { + return; + } + } + } + + if (psOperand->eType == OPERAND_TYPE_CONSTANT_BUFFER) + { /*ConstantBuffer* psCBuf = NULL; ShaderVar* psVar = NULL; int32_t index = -1; @@ -130,897 +130,907 @@ void TranslateOperandSwizzleWithMask(bstring glsl, HLSLCrossCompilerContext* psC bformata(glsl, "[%d]", index); }*/ - //return; - } - - if(psOperand->iWriteMaskEnabled && - psOperand->iNumComponents != 1) - { - //Component Mask - if(psOperand->eSelMode == OPERAND_4_COMPONENT_MASK_MODE) - { - uint32_t mask; - if (psOperand->ui32CompMask != 0) - mask = psOperand->ui32CompMask & ui32ComponentMask; - else - mask = ui32ComponentMask; - - if(mask != 0 && mask != OPERAND_4_COMPONENT_MASK_ALL) - { - bcatcstr(glsl, "."); - if(mask & OPERAND_4_COMPONENT_MASK_X) - { - ASSERT(iRebase == 0); - bcatcstr(glsl, "x"); - } - if(mask & OPERAND_4_COMPONENT_MASK_Y) - { - ASSERT(iRebase <= 1); - bformata(glsl, "%c", "xy"[1 - iRebase]); - } - if(mask & OPERAND_4_COMPONENT_MASK_Z) - { - ASSERT(iRebase <= 2); - bformata(glsl, "%c", "xyz"[2 - iRebase]); - } - if(mask & OPERAND_4_COMPONENT_MASK_W) - { - ASSERT(iRebase <= 3); - bformata(glsl, "%c", "xyzw"[3 - iRebase]); - } - } - } - else - //Component Swizzle - if(psOperand->eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE) - { - if (ui32ComponentMask != OPERAND_4_COMPONENT_MASK_ALL || - !(psOperand->aui32Swizzle[0] == OPERAND_4_COMPONENT_X && - psOperand->aui32Swizzle[1] == OPERAND_4_COMPONENT_Y && - psOperand->aui32Swizzle[2] == OPERAND_4_COMPONENT_Z && - psOperand->aui32Swizzle[3] == OPERAND_4_COMPONENT_W - ) - ) - { - uint32_t i; - - bcatcstr(glsl, "."); - - for (i = 0; i < 4; ++i) - { - if (!(ui32ComponentMask & (OPERAND_4_COMPONENT_MASK_X << i))) - continue; - - if (psOperand->aui32Swizzle[i] == OPERAND_4_COMPONENT_X) - { - ASSERT(iRebase == 0); - bcatcstr(glsl, "x"); - } - else if (psOperand->aui32Swizzle[i] == OPERAND_4_COMPONENT_Y) - { - ASSERT(iRebase <= 1); - bformata(glsl, "%c", "xy"[1 - iRebase]); - } - else if (psOperand->aui32Swizzle[i] == OPERAND_4_COMPONENT_Z) - { - ASSERT(iRebase <= 2); - bformata(glsl, "%c", "xyz"[2 - iRebase]); - } - else if (psOperand->aui32Swizzle[i] == OPERAND_4_COMPONENT_W) - { - ASSERT(iRebase <= 3); - bformata(glsl, "%c", "xyzw"[3 - iRebase]); - } - } - } - } - else - if(psOperand->eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE) // ui32ComponentMask is ignored in this case - { - bcatcstr(glsl, "."); - - if(psOperand->aui32Swizzle[0] == OPERAND_4_COMPONENT_X) - { - ASSERT(iRebase == 0); - bcatcstr(glsl, "x"); - } - else - if(psOperand->aui32Swizzle[0] == OPERAND_4_COMPONENT_Y) - { - ASSERT(iRebase <= 1); - bformata(glsl, "%c", "xy"[1 - iRebase]); - } - else - if(psOperand->aui32Swizzle[0] == OPERAND_4_COMPONENT_Z) - { - ASSERT(iRebase <= 2); - bformata(glsl, "%c", "xyz"[2 - iRebase]); - } - else - if(psOperand->aui32Swizzle[0] == OPERAND_4_COMPONENT_W) - { - ASSERT(iRebase <= 3); - bformata(glsl, "%c", "xyzw"[3 - iRebase]); - } - } - - //Component Select 1 - } + //return; + } + + if (psOperand->iWriteMaskEnabled && + psOperand->iNumComponents != 1) + { + //Component Mask + if (psOperand->eSelMode == OPERAND_4_COMPONENT_MASK_MODE) + { + uint32_t mask; + if (psOperand->ui32CompMask != 0) + mask = psOperand->ui32CompMask & ui32ComponentMask; + else + mask = ui32ComponentMask; + + if (mask != 0 && mask != OPERAND_4_COMPONENT_MASK_ALL) + { + bcatcstr(glsl, "."); + if (mask & OPERAND_4_COMPONENT_MASK_X) + { + ASSERT(iRebase == 0); + bcatcstr(glsl, "x"); + } + if (mask & OPERAND_4_COMPONENT_MASK_Y) + { + ASSERT(iRebase <= 1); + bformata(glsl, "%c", "xy"[1 - iRebase]); + } + if (mask & OPERAND_4_COMPONENT_MASK_Z) + { + ASSERT(iRebase <= 2); + bformata(glsl, "%c", "xyz"[2 - iRebase]); + } + if (mask & OPERAND_4_COMPONENT_MASK_W) + { + ASSERT(iRebase <= 3); + bformata(glsl, "%c", "xyzw"[3 - iRebase]); + } + } + } + else + //Component Swizzle + if (psOperand->eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE) + { + if (ui32ComponentMask != OPERAND_4_COMPONENT_MASK_ALL || + !(psOperand->aui32Swizzle[0] == OPERAND_4_COMPONENT_X && + psOperand->aui32Swizzle[1] == OPERAND_4_COMPONENT_Y && + psOperand->aui32Swizzle[2] == OPERAND_4_COMPONENT_Z && + psOperand->aui32Swizzle[3] == OPERAND_4_COMPONENT_W + ) + ) + { + uint32_t i; + + bcatcstr(glsl, "."); + + for (i = 0; i < 4; ++i) + { + if (!(ui32ComponentMask & (OPERAND_4_COMPONENT_MASK_X << i))) + continue; + + if (psOperand->aui32Swizzle[i] == OPERAND_4_COMPONENT_X) + { + ASSERT(iRebase == 0); + bcatcstr(glsl, "x"); + } + else if (psOperand->aui32Swizzle[i] == OPERAND_4_COMPONENT_Y) + { + ASSERT(iRebase <= 1); + bformata(glsl, "%c", "xy"[1 - iRebase]); + } + else if (psOperand->aui32Swizzle[i] == OPERAND_4_COMPONENT_Z) + { + ASSERT(iRebase <= 2); + bformata(glsl, "%c", "xyz"[2 - iRebase]); + } + else if (psOperand->aui32Swizzle[i] == OPERAND_4_COMPONENT_W) + { + ASSERT(iRebase <= 3); + bformata(glsl, "%c", "xyzw"[3 - iRebase]); + } + } + } + } + else if (psOperand->eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE) // ui32ComponentMask is ignored in this case + { + bcatcstr(glsl, "."); + + if (psOperand->aui32Swizzle[0] == OPERAND_4_COMPONENT_X) + { + ASSERT(iRebase == 0); + bcatcstr(glsl, "x"); + } + else if (psOperand->aui32Swizzle[0] == OPERAND_4_COMPONENT_Y) + { + ASSERT(iRebase <= 1); + bformata(glsl, "%c", "xy"[1 - iRebase]); + } + else if (psOperand->aui32Swizzle[0] == OPERAND_4_COMPONENT_Z) + { + ASSERT(iRebase <= 2); + bformata(glsl, "%c", "xyz"[2 - iRebase]); + } + else if (psOperand->aui32Swizzle[0] == OPERAND_4_COMPONENT_W) + { + ASSERT(iRebase <= 3); + bformata(glsl, "%c", "xyzw"[3 - iRebase]); + } + } + + //Component Select 1 + } } void ToGLSL::TranslateOperandIndex(const Operand* psOperand, int index) { - int i = index; - - bstring glsl = *psContext->currentGLSLString; - - ASSERT(index < psOperand->iIndexDims); - - switch(psOperand->eIndexRep[i]) - { - case OPERAND_INDEX_IMMEDIATE32: - { - bformata(glsl, "[%d]", psOperand->aui32ArraySizes[i]); - break; - } - case OPERAND_INDEX_RELATIVE: - { - bcatcstr(glsl, "["); - TranslateOperand(psOperand->m_SubOperands[i].get(), TO_FLAG_INTEGER); - bcatcstr(glsl, "]"); - break; - } - case OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE: - { - bcatcstr(glsl, "["); //Indexes must be integral. - TranslateOperand(psOperand->m_SubOperands[i].get(), TO_FLAG_INTEGER); - bformata(glsl, " + %d]", psOperand->aui32ArraySizes[i]); - break; - } - default: - { - break; - } - } + int i = index; + + bstring glsl = *psContext->currentGLSLString; + + ASSERT(index < psOperand->iIndexDims); + + switch (psOperand->eIndexRep[i]) + { + case OPERAND_INDEX_IMMEDIATE32: + { + bformata(glsl, "[%d]", psOperand->aui32ArraySizes[i]); + break; + } + case OPERAND_INDEX_RELATIVE: + { + bcatcstr(glsl, "["); + TranslateOperand(psOperand->m_SubOperands[i].get(), TO_FLAG_INTEGER); + bcatcstr(glsl, "]"); + break; + } + case OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE: + { + bcatcstr(glsl, "["); //Indexes must be integral. + TranslateOperand(psOperand->m_SubOperands[i].get(), TO_FLAG_INTEGER); + bformata(glsl, " + %d]", psOperand->aui32ArraySizes[i]); + break; + } + default: + { + break; + } + } } void ToGLSL::TranslateOperandIndexMAD(const Operand* psOperand, int index, uint32_t multiply, uint32_t add) { - int i = index; - int isGeoShader = psContext->psShader->eShaderType == GEOMETRY_SHADER ? 1 : 0; - - bstring glsl = *psContext->currentGLSLString; - - ASSERT(index < psOperand->iIndexDims); - - switch(psOperand->eIndexRep[i]) - { - case OPERAND_INDEX_IMMEDIATE32: - { - if(i > 0 || isGeoShader) - { - bformata(glsl, "[%d*%d+%d]", psOperand->aui32ArraySizes[i], multiply, add); - } - else - { - bformata(glsl, "%d*%d+%d", psOperand->aui32ArraySizes[i], multiply, add); - } - break; - } - case OPERAND_INDEX_RELATIVE: - { - bcatcstr(glsl, "[int("); //Indexes must be integral. - TranslateOperand(psOperand->m_SubOperands[i].get(), TO_FLAG_NONE); - bformata(glsl, ")*%d+%d]", multiply, add); - break; - } - case OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE: - { - bcatcstr(glsl, "[(int("); //Indexes must be integral. - TranslateOperand(psOperand->m_SubOperands[i].get(), TO_FLAG_NONE); - bformata(glsl, ") + %d)*%d+%d]", psOperand->aui32ArraySizes[i], multiply, add); - break; - } - default: - { - break; - } - } + int i = index; + int isGeoShader = psContext->psShader->eShaderType == GEOMETRY_SHADER ? 1 : 0; + + bstring glsl = *psContext->currentGLSLString; + + ASSERT(index < psOperand->iIndexDims); + + switch (psOperand->eIndexRep[i]) + { + case OPERAND_INDEX_IMMEDIATE32: + { + if (i > 0 || isGeoShader) + { + bformata(glsl, "[%d*%d+%d]", psOperand->aui32ArraySizes[i], multiply, add); + } + else + { + bformata(glsl, "%d*%d+%d", psOperand->aui32ArraySizes[i], multiply, add); + } + break; + } + case OPERAND_INDEX_RELATIVE: + { + bcatcstr(glsl, "[int("); //Indexes must be integral. + TranslateOperand(psOperand->m_SubOperands[i].get(), TO_FLAG_NONE); + bformata(glsl, ")*%d+%d]", multiply, add); + break; + } + case OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE: + { + bcatcstr(glsl, "[(int("); //Indexes must be integral. + TranslateOperand(psOperand->m_SubOperands[i].get(), TO_FLAG_NONE); + bformata(glsl, ") + %d)*%d+%d]", psOperand->aui32ArraySizes[i], multiply, add); + break; + } + default: + { + break; + } + } } static std::string GetBitcastOp(HLSLCrossCompilerContext *psContext, SHADER_VARIABLE_TYPE from, SHADER_VARIABLE_TYPE to, uint32_t numComponents) { - if (psContext->psShader->eTargetLanguage == LANG_METAL) - { - std::ostringstream oss; - oss << "as_type<"; - oss << GetConstructorForTypeMetal(to, numComponents); - oss << ">"; - return oss.str(); - } - else - { - if ((to == SVT_FLOAT || to == SVT_FLOAT16 || to == SVT_FLOAT10) && from == SVT_INT) - return "intBitsToFloat"; - else if ((to == SVT_FLOAT || to == SVT_FLOAT16 || to == SVT_FLOAT10) && from == SVT_UINT) - return "uintBitsToFloat"; - else if (to == SVT_INT && (from == SVT_FLOAT || from == SVT_FLOAT16 || from == SVT_FLOAT10)) - return "floatBitsToInt"; - else if (to == SVT_UINT && (from == SVT_FLOAT || from == SVT_FLOAT16 || from == SVT_FLOAT10)) - return "floatBitsToUint"; - } - - ASSERT(0); - return "ERROR missing components in GetBitcastOp()"; + if (psContext->psShader->eTargetLanguage == LANG_METAL) + { + std::ostringstream oss; + oss << "as_type<"; + oss << GetConstructorForTypeMetal(to, numComponents); + oss << ">"; + return oss.str(); + } + else + { + if ((to == SVT_FLOAT || to == SVT_FLOAT16 || to == SVT_FLOAT10) && from == SVT_INT) + return "intBitsToFloat"; + else if ((to == SVT_FLOAT || to == SVT_FLOAT16 || to == SVT_FLOAT10) && from == SVT_UINT) + return "uintBitsToFloat"; + else if (to == SVT_INT && (from == SVT_FLOAT || from == SVT_FLOAT16 || from == SVT_FLOAT10)) + return "floatBitsToInt"; + else if (to == SVT_UINT && (from == SVT_FLOAT || from == SVT_FLOAT16 || from == SVT_FLOAT10)) + return "floatBitsToUint"; + } + + ASSERT(0); + return "ERROR missing components in GetBitcastOp()"; } // Helper function to print out a single 32-bit immediate value in desired format static void printImmediate32(HLSLCrossCompilerContext *psContext, uint32_t value, SHADER_VARIABLE_TYPE eType) { - bstring glsl = *psContext->currentGLSLString; - int needsParenthesis = 0; - - // Print floats as bit patterns. - if ((eType == SVT_FLOAT || eType == SVT_FLOAT16 || eType == SVT_FLOAT10) && psContext->psShader->ui32MajorVersion > 3 && HaveBitEncodingOps(psContext->psShader->eTargetLanguage) && fpcheck(*((float *)(&value)))) - { - if (psContext->psShader->eTargetLanguage == LANG_METAL) - bcatcstr(glsl, "as_type("); - else - bcatcstr(glsl, "intBitsToFloat("); - eType = SVT_INT; - needsParenthesis = 1; - } - - switch (eType) - { - default: - ASSERT(0); - case SVT_INT: - case SVT_INT16: - case SVT_INT12: - // Adreno bug (happens only on android 4.* GLES3) casting unsigned representation of negative values to signed int - // results in undefined value/fails to link shader, need to print as signed decimal - if (value > 0x7fffffff && psContext->psShader->eTargetLanguage == LANG_ES_300) - bformata(glsl, "%i", (int32_t)value); - // Need special handling for anything >= uint 0x3fffffff - else if (value > 0x3ffffffe) - { - if (HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) - bformata(glsl, "int(0x%Xu)", value); - else - bformata(glsl, "0x%X", value); - } - else if(value <= 1024) // Print anything below 1024 as decimal, and hex after that - bformata(glsl, "%d", value); - else - bformata(glsl, "0x%X", value); - break; - case SVT_UINT: - case SVT_UINT16: - // Adreno bug workaround (happens only on pre-lollipop Nexus 4's): '0u' is treated as int. - if (value == 0 && psContext->psShader->eTargetLanguage == LANG_ES_300) - bcatcstr(glsl, "uint(0u)"); - else - bformata(glsl, "%uu", value); - break; - case SVT_FLOAT: - case SVT_FLOAT10: - case SVT_FLOAT16: - HLSLcc::PrintFloat(glsl, *((float *)(&value))); - break; - case SVT_BOOL: - if (value == 0) - bcatcstr(glsl, "false"); - else - bcatcstr(glsl, "true"); - } - if (needsParenthesis) - bcatcstr(glsl, ")"); + bstring glsl = *psContext->currentGLSLString; + int needsParenthesis = 0; + + // Print floats as bit patterns. + if ((eType == SVT_FLOAT || eType == SVT_FLOAT16 || eType == SVT_FLOAT10) && psContext->psShader->ui32MajorVersion > 3 && HaveBitEncodingOps(psContext->psShader->eTargetLanguage) && fpcheck(*((float *)(&value)))) + { + if (psContext->psShader->eTargetLanguage == LANG_METAL) + bcatcstr(glsl, "as_type("); + else + bcatcstr(glsl, "intBitsToFloat("); + eType = SVT_INT; + needsParenthesis = 1; + } + + switch (eType) + { + default: + ASSERT(0); + case SVT_INT: + case SVT_INT16: + case SVT_INT12: + if (value > 0x3ffffffe) + { + if (HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) + bformata(glsl, "int(0x%Xu)", value); + else + bformata(glsl, "0x%X", value); + } + else if (value <= 1024) // Print anything below 1024 as decimal, and hex after that + bformata(glsl, "%d", value); + else + bformata(glsl, "0x%X", value); + break; + case SVT_UINT: + case SVT_UINT16: + // Adreno bug workaround (happens only on pre-lollipop Nexus 4's): '0u' is treated as int. + if (value == 0 && psContext->psShader->eTargetLanguage == LANG_ES_300) + bcatcstr(glsl, "uint(0u)"); + else + bformata(glsl, "%uu", value); + break; + case SVT_FLOAT: + case SVT_FLOAT10: + case SVT_FLOAT16: + HLSLcc::PrintFloat(glsl, *((float *)(&value))); + break; + case SVT_BOOL: + if (value == 0) + bcatcstr(glsl, "false"); + else + bcatcstr(glsl, "true"); + } + if (needsParenthesis) + bcatcstr(glsl, ")"); } void ToGLSL::TranslateVariableNameWithMask(const Operand* psOperand, uint32_t ui32TOFlag, uint32_t* pui32IgnoreSwizzle, uint32_t ui32CompMask, int *piRebase) { - TranslateVariableNameWithMask(*psContext->currentGLSLString, psOperand, ui32TOFlag, pui32IgnoreSwizzle, ui32CompMask, piRebase); + TranslateVariableNameWithMask(*psContext->currentGLSLString, psOperand, ui32TOFlag, pui32IgnoreSwizzle, ui32CompMask, piRebase); } void ToGLSL::DeclareDynamicIndexWrapper(const struct ShaderVarType* psType) { - DeclareDynamicIndexWrapper(psType->name.c_str(), psType->Class, psType->Type, psType->Rows, psType->Columns, psType->Elements); + DeclareDynamicIndexWrapper(psType->name.c_str(), psType->Class, psType->Type, psType->Rows, psType->Columns, psType->Elements); } void ToGLSL::DeclareDynamicIndexWrapper(const char* psName, SHADER_VARIABLE_CLASS eClass, SHADER_VARIABLE_TYPE eType, uint32_t ui32Rows, uint32_t ui32Columns, uint32_t ui32Elements) { - bstring glsl = psContext->beforeMain; - - const char* suffix = "DynamicIndex"; - const uint32_t maxElemCount = 256; - uint32_t elemCount = ui32Elements; - - if (m_FunctionDefinitions.find(psName) != m_FunctionDefinitions.end()) - return; - - // Add a simple define that one can search and replace on devices that support dynamic indexing the usual way - if (m_FunctionDefinitions.find(suffix) == m_FunctionDefinitions.end()) - { - m_FunctionDefinitions.insert(std::make_pair(suffix, "#define UNITY_DYNAMIC_INDEX_ES2 0\n")); - } - - bcatcstr(glsl, "\n"); - - if (eClass == SVC_STRUCT) - { - bformata(glsl, "%s_Type %s%s", psName, psName, suffix); - } - else if(eClass == SVC_MATRIX_COLUMNS || eClass == SVC_MATRIX_ROWS) - { - if (psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) - { - // Translate matrices into vec4 arrays - bformata(glsl, "%s " HLSLCC_TRANSLATE_MATRIX_FORMAT_STRING "%s%s", HLSLcc::GetConstructorForType(psContext, eType, 4), ui32Rows, ui32Columns, psName, suffix); - elemCount = (eClass == SVC_MATRIX_COLUMNS ? ui32Columns : ui32Rows); - if (ui32Elements > 1) - { - elemCount *= ui32Elements; - } - } - else - { - bformata(glsl, "%s %s%s", HLSLcc::GetMatrixTypeName(psContext, eType, ui32Columns, ui32Rows).c_str(), psName, suffix); - } - } - else if (eClass == SVC_VECTOR && ui32Columns > 1) - { - bformata(glsl, "%s %s%s", HLSLcc::GetConstructorForType(psContext, eType, ui32Columns), psName, suffix); - } - else if ((eClass == SVC_SCALAR) || (eClass == SVC_VECTOR && ui32Columns == 1)) - { - bformata(glsl, "%s %s%s", HLSLcc::GetConstructorForType(psContext, eType, 1), psName, suffix); - } - bformata(glsl, "(int i){\n"); - bcatcstr(glsl, "#if UNITY_DYNAMIC_INDEX_ES2\n"); - bformata(glsl, " return %s[i];\n", psName); - bcatcstr(glsl, "#else\n"); - bformata(glsl, "#define d_ar %s\n", psName); - bformata(glsl, " if (i <= 0) return d_ar[0];"); - - // Let's draw a line somewhere with this workaround - for (int i = 1; i < std::min(elemCount, maxElemCount); i++) { - bformata(glsl, " else if (i == %d) return d_ar[%d];", i, i); - } - bformata(glsl, "\n return d_ar[0];\n"); - bformata(glsl, "#undef d_ar\n"); - bcatcstr(glsl, "#endif\n"); - bformata(glsl, "}\n\n"); - m_FunctionDefinitions.insert(std::make_pair(psName, "")); + bstring glsl = psContext->beforeMain; + + const char* suffix = "DynamicIndex"; + const uint32_t maxElemCount = 256; + uint32_t elemCount = ui32Elements; + + if (m_FunctionDefinitions.find(psName) != m_FunctionDefinitions.end()) + return; + + // Add a simple define that one can search and replace on devices that support dynamic indexing the usual way + if (m_FunctionDefinitions.find(suffix) == m_FunctionDefinitions.end()) + { + m_FunctionDefinitions.insert(std::make_pair(suffix, "#define UNITY_DYNAMIC_INDEX_ES2 0\n")); + m_FunctionDefinitionsOrder.push_back(suffix); + } + + bcatcstr(glsl, "\n"); + + char name[256]; + if ((eClass == SVC_MATRIX_COLUMNS || eClass == SVC_MATRIX_ROWS) && psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) + sprintf(name, HLSLCC_TRANSLATE_MATRIX_FORMAT_STRING "%s", ui32Rows, ui32Columns, psName); + else + memcpy(name, psName, strlen(psName) + 1); + + if (eClass == SVC_STRUCT) + { + bformata(glsl, "%s_Type %s%s", psName, psName, suffix); + } + else if (eClass == SVC_MATRIX_COLUMNS || eClass == SVC_MATRIX_ROWS) + { + if (psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) + { + // Translate matrices into vec4 arrays + bformata(glsl, "%s " HLSLCC_TRANSLATE_MATRIX_FORMAT_STRING "%s%s", HLSLcc::GetConstructorForType(psContext, eType, 4), ui32Rows, ui32Columns, psName, suffix); + elemCount = (eClass == SVC_MATRIX_COLUMNS ? ui32Columns : ui32Rows); + if (ui32Elements > 1) + { + elemCount *= ui32Elements; + } + } + else + { + bformata(glsl, "%s %s%s", HLSLcc::GetMatrixTypeName(psContext, eType, ui32Columns, ui32Rows).c_str(), psName, suffix); + } + } + else if (eClass == SVC_VECTOR && ui32Columns > 1) + { + bformata(glsl, "%s %s%s", HLSLcc::GetConstructorForType(psContext, eType, ui32Columns), psName, suffix); + } + else if ((eClass == SVC_SCALAR) || (eClass == SVC_VECTOR && ui32Columns == 1)) + { + bformata(glsl, "%s %s%s", HLSLcc::GetConstructorForType(psContext, eType, 1), psName, suffix); + } + bformata(glsl, "(int i){\n"); + bcatcstr(glsl, "#if UNITY_DYNAMIC_INDEX_ES2\n"); + bformata(glsl, " return %s[i];\n", name); + bcatcstr(glsl, "#else\n"); + bformata(glsl, "#define d_ar %s\n", name); + bformata(glsl, " if (i <= 0) return d_ar[0];"); + + // Let's draw a line somewhere with this workaround + for (int i = 1; i < std::min(elemCount, maxElemCount); i++) + { + bformata(glsl, " else if (i == %d) return d_ar[%d];", i, i); + } + bformata(glsl, "\n return d_ar[0];\n"); + bformata(glsl, "#undef d_ar\n"); + bcatcstr(glsl, "#endif\n"); + bformata(glsl, "}\n\n"); + m_FunctionDefinitions.insert(std::make_pair(psName, "")); + m_FunctionDefinitionsOrder.push_back(psName); } void ToGLSL::TranslateVariableNameWithMask(bstring glsl, const Operand* psOperand, uint32_t ui32TOFlag, uint32_t* pui32IgnoreSwizzle, uint32_t ui32CompMask, int *piRebase) { - int numParenthesis = 0; - int hasCtor = 0; - int needsBoolUpscale = 0; // If nonzero, bools need * 0xffffffff in them - SHADER_VARIABLE_TYPE requestedType = TypeFlagsToSVTType(ui32TOFlag); - SHADER_VARIABLE_TYPE eType = psOperand->GetDataType(psContext, requestedType); - int numComponents = psOperand->GetNumSwizzleElements(ui32CompMask); - int requestedComponents = 0; - int scalarWithSwizzle = 0; - - *pui32IgnoreSwizzle = 0; - - if (psOperand->eType == OPERAND_TYPE_TEMP) - { - // Check for scalar - if (psContext->psShader->GetTempComponentCount(eType, psOperand->ui32RegisterNumber) == 1 && psOperand->eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE) - { - scalarWithSwizzle = 1; // Going to need a constructor - } - } - - if (psOperand->eType == OPERAND_TYPE_INPUT) - { - // Check for scalar - // You would think checking would be easy but there is a caveat: - // checking abScalarInput might report as scalar, while in reality that was redirected and now is vector so swizzle must be preserved - // as an example consider we have input: - // float2 x; float y; - // and later on we do - // tex2D(xxx, fixed2(x.x, y)); - // in that case we will generate redirect but which ui32RegisterNumber will be used for it is not strictly "specified" - // so we may end up with treating it as scalar (even though it is vector now) - const int redirectInput = psContext->psShader->asPhases[psContext->currentPhase].acInputNeedsRedirect[psOperand->ui32RegisterNumber]; - const bool wasRedirected = redirectInput == 0xFF || redirectInput == 0xFE; - - const int scalarInput = psContext->psShader->abScalarInput[psOperand->GetRegisterSpace(psContext)][psOperand->ui32RegisterNumber]; - if (!wasRedirected && (scalarInput & psOperand->GetAccessMask()) && (psOperand->eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE)) - { - scalarWithSwizzle = 1; - *pui32IgnoreSwizzle = 1; - } - } - - if (psOperand->eType == OPERAND_TYPE_CONSTANT_BUFFER && psOperand->eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE && psOperand->IsSwizzleReplicated()) - { - // Needs scalar check as well - const ConstantBuffer* psCBuf = NULL; - const ShaderVarType* psVarType = NULL; - int32_t rebase = 0; - bool isArray; - psContext->psShader->sInfo.GetConstantBufferFromBindingPoint(RGROUP_CBUFFER, psOperand->aui32ArraySizes[0], &psCBuf); - ShaderInfo::GetShaderVarFromOffset(psOperand->aui32ArraySizes[1], psOperand->aui32Swizzle, psCBuf, &psVarType, &isArray, NULL, &rebase, psContext->flags); - if (psVarType->Columns == 1) - { - scalarWithSwizzle = 1; // Needs a constructor - *pui32IgnoreSwizzle = 1; - } - - } - - if (piRebase) - *piRebase = 0; - - if (ui32TOFlag & TO_AUTO_EXPAND_TO_VEC2) - requestedComponents = 2; - else if (ui32TOFlag & TO_AUTO_EXPAND_TO_VEC3) - requestedComponents = 3; - else if (ui32TOFlag & TO_AUTO_EXPAND_TO_VEC4) - requestedComponents = 4; - - requestedComponents = std::max(requestedComponents, numComponents); - - if (!(ui32TOFlag & (TO_FLAG_DESTINATION | TO_FLAG_NAME_ONLY | TO_FLAG_DECLARATION_NAME))) - { - if (psOperand->eType == OPERAND_TYPE_IMMEDIATE32 || psOperand->eType == OPERAND_TYPE_IMMEDIATE64) - { - // Mark the operand type to match whatever we're asking for in the flags. - ((Operand *)psOperand)->aeDataType[0] = requestedType; - ((Operand *)psOperand)->aeDataType[1] = requestedType; - ((Operand *)psOperand)->aeDataType[2] = requestedType; - ((Operand *)psOperand)->aeDataType[3] = requestedType; - } - - if (AreTypesCompatible(eType, ui32TOFlag) == 0) - { - if (CanDoDirectCast(psContext, eType, requestedType) || !HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) - { - bformata(glsl, "%s(", GetConstructorForType(psContext, requestedType, requestedComponents, false)); - numParenthesis++; - hasCtor = 1; - if (eType == SVT_BOOL) - needsBoolUpscale = 1; - } - else - { - // Direct cast not possible, need to do bitcast. - bformata(glsl, "%s(", GetBitcastOp(psContext, eType, requestedType, requestedComponents).c_str()); - numParenthesis++; - } - } - - // Add ctor if needed (upscaling). Type conversion is already handled above, so here we must - // use the original type to not make type conflicts in bitcasts - if (((numComponents < requestedComponents)||(scalarWithSwizzle != 0)) && (hasCtor == 0)) - { -// ASSERT(numComponents == 1); - bformata(glsl, "%s(", GetConstructorForType(psContext, eType, requestedComponents, false)); - numParenthesis++; - hasCtor = 1; - } - } - - - switch(psOperand->eType) - { - case OPERAND_TYPE_IMMEDIATE32: - { - if(psOperand->iNumComponents == 1) - { - printImmediate32(psContext, *((unsigned int*)(&psOperand->afImmediates[0])), requestedType); - } - else - { - int i; - int firstItemAdded = 0; - if (hasCtor == 0) - { - bformata(glsl, "%s(", GetConstructorForType(psContext, requestedType, requestedComponents, false)); - numParenthesis++; - hasCtor = 1; - } - for (i = 0; i < 4; i++) - { - uint32_t uval; - if (!(ui32CompMask & (1 << i))) - continue; - - if (firstItemAdded) - bcatcstr(glsl, ", "); - uval = *((uint32_t*)(&psOperand->afImmediates[i >= psOperand->iNumComponents ? psOperand->iNumComponents-1 : i])); - printImmediate32(psContext, uval, requestedType); - firstItemAdded = 1; - } - bcatcstr(glsl, ")"); - *pui32IgnoreSwizzle = 1; - numParenthesis--; - } - break; - } - case OPERAND_TYPE_IMMEDIATE64: - { - if(psOperand->iNumComponents == 1) - { - bformata(glsl, "%.17g", - psOperand->adImmediates[0]); - } - else - { - bformata(glsl, "dvec4(%.17g, %.17g, %.17g, %.17g)", - psOperand->adImmediates[0], - psOperand->adImmediates[1], - psOperand->adImmediates[2], - psOperand->adImmediates[3]); - if(psOperand->iNumComponents != 4) - { - AddSwizzleUsingElementCount(glsl, psOperand->iNumComponents); - } - } - break; - } - case OPERAND_TYPE_INPUT: - { - int regSpace = psOperand->GetRegisterSpace(psContext); - switch(psOperand->iIndexDims) - { - case INDEX_2D: - { - const ShaderInfo::InOutSignature *psSig = NULL; - psContext->psShader->sInfo.GetInputSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->ui32CompMask, &psSig); - - if ((psSig->eSystemValueType == NAME_POSITION && psSig->ui32SemanticIndex == 0) || - (psSig->semanticName == "POS" && psSig->ui32SemanticIndex == 0) || - (psSig->semanticName == "SV_POSITION" && psSig->ui32SemanticIndex == 0)) - { - bcatcstr(glsl, "gl_in"); - TranslateOperandIndex(psOperand, 0);//Vertex index - bcatcstr(glsl, ".gl_Position"); - } - else - { - std::string name = psContext->GetDeclaredInputName(psOperand, piRebase, 0, pui32IgnoreSwizzle); - - bformata(glsl, "%s", name.c_str()); - TranslateOperandIndex(psOperand, 0);//Vertex index - } - break; - } - default: - { - if(psOperand->eIndexRep[0] == OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE) - { - bformata(glsl, "phase%d_Input%d_%d[", psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber); - TranslateOperand(psOperand->m_SubOperands[0].get(), TO_FLAG_INTEGER); - bcatcstr(glsl, "]"); - } - else - { - if(psContext->psShader->aIndexedInput[regSpace][psOperand->ui32RegisterNumber] != 0) - { - const uint32_t parentIndex = psContext->psShader->aIndexedInputParents[regSpace][psOperand->ui32RegisterNumber]; - bformata(glsl, "phase%d_Input%d_%d[%d]", psContext->currentPhase, regSpace, parentIndex, - psOperand->ui32RegisterNumber - parentIndex); - } - else - { - std::string name = psContext->GetDeclaredInputName(psOperand, piRebase, 0, pui32IgnoreSwizzle); - - // Rewrite the variable name if we're using framebuffer fetch - if (psContext->psShader->extensions->EXT_shader_framebuffer_fetch && - psContext->psShader->eShaderType == PIXEL_SHADER && - psContext->flags & HLSLCC_FLAG_SHADER_FRAMEBUFFER_FETCH) - { - // With ES2, leave separate variable names for input - if (!WriteToFragData(psContext->psShader->eTargetLanguage) && - name.size() == 13 && !strncmp(name.c_str(), "vs_SV_Target", 12)) - bcatcstr(glsl, name.substr(3).c_str()); - else - bcatcstr(glsl, name.c_str()); - } - else - { - bcatcstr(glsl, name.c_str()); - } - } - } - break; - } - } - break; - } - case OPERAND_TYPE_OUTPUT: - { - /*if(psContext->psShader->eShaderType == HULL_SHADER && psOperand->eIndexRep[0] == OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE) - { - int stream = 0; - const char* name = GetDeclaredOutputName(psContext, HULL_SHADER, psOperand, &stream); - bcatcstr(glsl, name); - } - else*/ - { - int stream = 0; - std::string name = psContext->GetDeclaredOutputName(psOperand, &stream, pui32IgnoreSwizzle, piRebase, 0); - bcatcstr(glsl, name.c_str()); - if (psOperand->m_SubOperands[0].get()) - { - bcatcstr(glsl, "["); - TranslateOperand(psOperand->m_SubOperands[0].get(), TO_AUTO_BITCAST_TO_INT); - bcatcstr(glsl, "]"); - } - } - break; - } - case OPERAND_TYPE_OUTPUT_DEPTH: - if (psContext->psShader->eTargetLanguage == LANG_ES_100 && !psContext->EnableExtension("GL_EXT_frag_depth")) - { - bcatcstr(psContext->extensions, "#ifdef GL_EXT_frag_depth\n"); - bcatcstr(psContext->extensions, "#define gl_FragDepth gl_FragDepthEXT\n"); - bcatcstr(psContext->extensions, "#endif\n"); - } - // fall through - case OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL: - case OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL: - { - bcatcstr(glsl, "gl_FragDepth"); - break; - } - case OPERAND_TYPE_TEMP: - { - SHADER_VARIABLE_TYPE eTempType = psOperand->GetDataType(psContext); - bcatcstr(glsl, HLSLCC_TEMP_PREFIX); - ASSERT(psOperand->ui32RegisterNumber < 0x10000); // Sanity check after temp splitting. - switch (eTempType) - { - case SVT_FLOAT: - ASSERT(psContext->psShader->psFloatTempSizes[psOperand->ui32RegisterNumber] != 0); - if (psContext->psShader->psFloatTempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - break; - case SVT_FLOAT16: - ASSERT(psContext->psShader->psFloat16TempSizes[psOperand->ui32RegisterNumber] != 0); - bcatcstr(glsl, "16_"); - if (psContext->psShader->psFloat16TempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - break; - case SVT_FLOAT10: - ASSERT(psContext->psShader->psFloat10TempSizes[psOperand->ui32RegisterNumber] != 0); - bcatcstr(glsl, "10_"); - if (psContext->psShader->psFloat10TempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - break; - case SVT_INT: - ASSERT(psContext->psShader->psIntTempSizes[psOperand->ui32RegisterNumber] != 0); - bcatcstr(glsl, "i"); - if (psContext->psShader->psIntTempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - break; - case SVT_INT16: - ASSERT(psContext->psShader->psInt16TempSizes[psOperand->ui32RegisterNumber] != 0); - bcatcstr(glsl, "i16_"); - if (psContext->psShader->psInt16TempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - break; - case SVT_INT12: - ASSERT(psContext->psShader->psInt12TempSizes[psOperand->ui32RegisterNumber] != 0); - bcatcstr(glsl, "i12_"); - if (psContext->psShader->psInt12TempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - break; - case SVT_UINT: - ASSERT(psContext->psShader->psUIntTempSizes[psOperand->ui32RegisterNumber] != 0); - bcatcstr(glsl, "u"); - if (psContext->psShader->psUIntTempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - break; - case SVT_UINT16: - ASSERT(psContext->psShader->psUInt16TempSizes[psOperand->ui32RegisterNumber] != 0); - bcatcstr(glsl, "u16_"); - if (psContext->psShader->psUInt16TempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - break; - case SVT_DOUBLE: - ASSERT(psContext->psShader->psDoubleTempSizes[psOperand->ui32RegisterNumber] != 0); - bcatcstr(glsl, "d"); - if (psContext->psShader->psDoubleTempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - break; - case SVT_BOOL: - ASSERT(psContext->psShader->psBoolTempSizes[psOperand->ui32RegisterNumber] != 0); - bcatcstr(glsl, "b"); - if (psContext->psShader->psBoolTempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - break; - default: - ASSERT(0 && "Should never get here!"); - } - // m_ForLoopInductorName overrides the register number, if available - if (psOperand->m_ForLoopInductorName != 0) - { - bformata(glsl, "_loop_%d", psOperand->m_ForLoopInductorName); - if (pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - } - else - bformata(glsl, "%d", psOperand->ui32RegisterNumber); - break; - } - case OPERAND_TYPE_SPECIAL_IMMCONSTINT: - { - bformata(glsl, "IntImmConst%d", psOperand->ui32RegisterNumber); - break; - } - case OPERAND_TYPE_SPECIAL_IMMCONST: - { - ASSERT(0 && "DX9 shaders no longer supported!"); - break; - } - case OPERAND_TYPE_SPECIAL_OUTBASECOLOUR: - { - bcatcstr(glsl, "BaseColour"); - break; - } - case OPERAND_TYPE_SPECIAL_OUTOFFSETCOLOUR: - { - bcatcstr(glsl, "OffsetColour"); - break; - } - case OPERAND_TYPE_SPECIAL_POSITION: - { - bcatcstr(glsl, "gl_Position"); - break; - } - case OPERAND_TYPE_SPECIAL_FOG: - { - bcatcstr(glsl, "Fog"); - break; - } - case OPERAND_TYPE_SPECIAL_POINTSIZE: - { - bcatcstr(glsl, "gl_PointSize"); - break; - } - case OPERAND_TYPE_SPECIAL_ADDRESS: - { - bcatcstr(glsl, "Address"); - break; - } - case OPERAND_TYPE_SPECIAL_LOOPCOUNTER: - { - bcatcstr(glsl, "LoopCounter"); - pui32IgnoreSwizzle[0] = 1; - break; - } - case OPERAND_TYPE_SPECIAL_TEXCOORD: - { - bformata(glsl, "TexCoord%d", psOperand->ui32RegisterNumber); - break; - } - case OPERAND_TYPE_CONSTANT_BUFFER: - { - const char* StageName = "VS"; - const ConstantBuffer* psCBuf = NULL; - const ShaderVarType* psVarType = NULL; - int32_t index = -1; - std::vector arrayIndices; - bool isArray = false; - bool isSubpassMS = false; - psContext->psShader->sInfo.GetConstantBufferFromBindingPoint(RGROUP_CBUFFER, psOperand->aui32ArraySizes[0], &psCBuf); - - switch(psContext->psShader->eShaderType) - { - case PIXEL_SHADER: - { - StageName = "PS"; - break; - } - case HULL_SHADER: - { - StageName = "HS"; - break; - } - case DOMAIN_SHADER: - { - StageName = "DS"; - break; - } - case GEOMETRY_SHADER: - { - StageName = "GS"; - break; - } - case COMPUTE_SHADER: - { - StageName = "CS"; - break; - } - default: - { - break; - } - } - - if(psCBuf && psCBuf->name == "OVR_multiview") - { - pui32IgnoreSwizzle[0] = 1; - bformata(glsl, "gl_ViewID_OVR"); - break; - } - - - if(ui32TOFlag & TO_FLAG_DECLARATION_NAME) - { - pui32IgnoreSwizzle[0] = 1; - } - - // FIXME: With ES 3.0 the buffer name is often not prepended to variable names - if(((psContext->flags & HLSLCC_FLAG_UNIFORM_BUFFER_OBJECT)!=HLSLCC_FLAG_UNIFORM_BUFFER_OBJECT) && - ((psContext->flags & HLSLCC_FLAG_DISABLE_GLOBALS_STRUCT)!=HLSLCC_FLAG_DISABLE_GLOBALS_STRUCT)) - { - if(psCBuf) - { - //$Globals. - if(psCBuf->name[0] == '$') - { - bformata(glsl, "Globals%s", StageName); - } - else - { - bformata(glsl, "%s%s", psCBuf->name.c_str(), StageName); - } - if((ui32TOFlag & TO_FLAG_DECLARATION_NAME) != TO_FLAG_DECLARATION_NAME) - { - bcatcstr(glsl, "."); - } - } - else - { - //bformata(glsl, "cb%d", psOperand->aui32ArraySizes[0]); - } - } - - if((ui32TOFlag & TO_FLAG_DECLARATION_NAME) != TO_FLAG_DECLARATION_NAME) - { - //Work out the variable name. Don't apply swizzle to that variable yet. - int32_t rebase = 0; + int numParenthesis = 0; + int hasCtor = 0; + int needsBoolUpscale = 0; // If nonzero, bools need * 0xffffffff in them + SHADER_VARIABLE_TYPE requestedType = TypeFlagsToSVTType(ui32TOFlag); + SHADER_VARIABLE_TYPE eType = psOperand->GetDataType(psContext, requestedType); + int numComponents = psOperand->GetNumSwizzleElements(ui32CompMask); + int requestedComponents = 0; + int scalarWithSwizzle = 0; + + *pui32IgnoreSwizzle = 0; + + if (psOperand->eType == OPERAND_TYPE_TEMP) + { + // Check for scalar + if (psContext->psShader->GetTempComponentCount(eType, psOperand->ui32RegisterNumber) == 1 && psOperand->eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE) + { + scalarWithSwizzle = 1; // Going to need a constructor + } + } + + if (psOperand->eType == OPERAND_TYPE_INPUT) + { + // Check for scalar + // You would think checking would be easy but there is a caveat: + // checking abScalarInput might report as scalar, while in reality that was redirected and now is vector so swizzle must be preserved + // as an example consider we have input: + // float2 x; float y; + // and later on we do + // tex2D(xxx, fixed2(x.x, y)); + // in that case we will generate redirect but which ui32RegisterNumber will be used for it is not strictly "specified" + // so we may end up with treating it as scalar (even though it is vector now) + const int redirectInput = psContext->psShader->asPhases[psContext->currentPhase].acInputNeedsRedirect[psOperand->ui32RegisterNumber]; + const bool wasRedirected = redirectInput == 0xFF || redirectInput == 0xFE; + + const int scalarInput = psContext->psShader->abScalarInput[psOperand->GetRegisterSpace(psContext)][psOperand->ui32RegisterNumber]; + if (!wasRedirected && (scalarInput & psOperand->GetAccessMask()) && (psOperand->eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE)) + { + scalarWithSwizzle = 1; + *pui32IgnoreSwizzle = 1; + } + } + + if (psOperand->eType == OPERAND_TYPE_CONSTANT_BUFFER && psOperand->eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE && psOperand->IsSwizzleReplicated()) + { + // Needs scalar check as well + const ConstantBuffer* psCBuf = NULL; + const ShaderVarType* psVarType = NULL; + int32_t rebase = 0; + bool isArray; + psContext->psShader->sInfo.GetConstantBufferFromBindingPoint(RGROUP_CBUFFER, psOperand->aui32ArraySizes[0], &psCBuf); + ShaderInfo::GetShaderVarFromOffset(psOperand->aui32ArraySizes[1], psOperand->aui32Swizzle, psCBuf, &psVarType, &isArray, NULL, &rebase, psContext->flags); + if (psVarType->Columns == 1) + { + scalarWithSwizzle = 1; // Needs a constructor + *pui32IgnoreSwizzle = 1; + } + } + + if (piRebase) + *piRebase = 0; + + if (ui32TOFlag & TO_AUTO_EXPAND_TO_VEC2) + requestedComponents = 2; + else if (ui32TOFlag & TO_AUTO_EXPAND_TO_VEC3) + requestedComponents = 3; + else if (ui32TOFlag & TO_AUTO_EXPAND_TO_VEC4) + requestedComponents = 4; + + requestedComponents = std::max(requestedComponents, numComponents); + + if (!(ui32TOFlag & (TO_FLAG_DESTINATION | TO_FLAG_NAME_ONLY | TO_FLAG_DECLARATION_NAME))) + { + if (psOperand->eType == OPERAND_TYPE_IMMEDIATE32 || psOperand->eType == OPERAND_TYPE_IMMEDIATE64) + { + // Mark the operand type to match whatever we're asking for in the flags. + ((Operand *)psOperand)->aeDataType[0] = requestedType; + ((Operand *)psOperand)->aeDataType[1] = requestedType; + ((Operand *)psOperand)->aeDataType[2] = requestedType; + ((Operand *)psOperand)->aeDataType[3] = requestedType; + } + + if (AreTypesCompatible(eType, ui32TOFlag) == 0) + { + if (CanDoDirectCast(psContext, eType, requestedType) || !HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) + { + hasCtor = 1; + if (eType == SVT_BOOL) + { + needsBoolUpscale = 1; + // make sure to wrap the whole thing in parens so the upscale + // multiply only applies to the bool + bcatcstr(glsl, "("); + numParenthesis++; + } + bformata(glsl, "%s(", GetConstructorForType(psContext, requestedType, requestedComponents, false)); + numParenthesis++; + } + else + { + // Direct cast not possible, need to do bitcast. + bformata(glsl, "%s(", GetBitcastOp(psContext, eType, requestedType, requestedComponents).c_str()); + numParenthesis++; + } + } + + // Add ctor if needed (upscaling). Type conversion is already handled above, so here we must + // use the original type to not make type conflicts in bitcasts + if (((numComponents < requestedComponents) || (scalarWithSwizzle != 0)) && (hasCtor == 0)) + { +// ASSERT(numComponents == 1); + bformata(glsl, "%s(", GetConstructorForType(psContext, eType, requestedComponents, false)); + numParenthesis++; + hasCtor = 1; + } + } + + + switch (psOperand->eType) + { + case OPERAND_TYPE_IMMEDIATE32: + { + if (psOperand->iNumComponents == 1) + { + printImmediate32(psContext, *((unsigned int*)(&psOperand->afImmediates[0])), requestedType); + } + else + { + int i; + int firstItemAdded = 0; + if (hasCtor == 0) + { + bformata(glsl, "%s(", GetConstructorForType(psContext, requestedType, requestedComponents, false)); + numParenthesis++; + hasCtor = 1; + } + for (i = 0; i < 4; i++) + { + uint32_t uval; + if (!(ui32CompMask & (1 << i))) + continue; + + if (firstItemAdded) + bcatcstr(glsl, ", "); + uval = *((uint32_t*)(&psOperand->afImmediates[i >= psOperand->iNumComponents ? psOperand->iNumComponents - 1 : i])); + printImmediate32(psContext, uval, requestedType); + firstItemAdded = 1; + } + bcatcstr(glsl, ")"); + *pui32IgnoreSwizzle = 1; + numParenthesis--; + } + break; + } + case OPERAND_TYPE_IMMEDIATE64: + { + if (psOperand->iNumComponents == 1) + { + bformata(glsl, "%.17g", + psOperand->adImmediates[0]); + } + else + { + bformata(glsl, "dvec4(%.17g, %.17g, %.17g, %.17g)", + psOperand->adImmediates[0], + psOperand->adImmediates[1], + psOperand->adImmediates[2], + psOperand->adImmediates[3]); + if (psOperand->iNumComponents != 4) + { + AddSwizzleUsingElementCount(glsl, psOperand->iNumComponents); + } + } + break; + } + case OPERAND_TYPE_INPUT: + { + int regSpace = psOperand->GetRegisterSpace(psContext); + switch (psOperand->iIndexDims) + { + case INDEX_2D: + { + const ShaderInfo::InOutSignature *psSig = NULL; + psContext->psShader->sInfo.GetInputSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->ui32CompMask, &psSig); + + if ((psSig->eSystemValueType == NAME_POSITION && psSig->ui32SemanticIndex == 0) || + (psSig->semanticName == "POS" && psSig->ui32SemanticIndex == 0) || + (psSig->semanticName == "SV_POSITION" && psSig->ui32SemanticIndex == 0)) + { + bcatcstr(glsl, "gl_in"); + TranslateOperandIndex(psOperand, 0);//Vertex index + bcatcstr(glsl, ".gl_Position"); + } + else + { + std::string name = psContext->GetDeclaredInputName(psOperand, piRebase, 0, pui32IgnoreSwizzle); + + bformata(glsl, "%s", name.c_str()); + TranslateOperandIndex(psOperand, 0);//Vertex index + } + break; + } + default: + { + if (psOperand->eIndexRep[0] == OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE) + { + bformata(glsl, "phase%d_Input%d_%d[", psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber); + TranslateOperand(psOperand->m_SubOperands[0].get(), TO_FLAG_INTEGER); + bcatcstr(glsl, "]"); + } + else + { + if (psContext->psShader->aIndexedInput[regSpace][psOperand->ui32RegisterNumber] != 0) + { + const uint32_t parentIndex = psContext->psShader->aIndexedInputParents[regSpace][psOperand->ui32RegisterNumber]; + bformata(glsl, "phase%d_Input%d_%d[%d]", psContext->currentPhase, regSpace, parentIndex, + psOperand->ui32RegisterNumber - parentIndex); + } + else + { + std::string name = psContext->GetDeclaredInputName(psOperand, piRebase, 0, pui32IgnoreSwizzle); + + // Rewrite the variable name if we're using framebuffer fetch + if (psContext->psShader->extensions->EXT_shader_framebuffer_fetch && + psContext->psShader->eShaderType == PIXEL_SHADER && + psContext->flags & HLSLCC_FLAG_SHADER_FRAMEBUFFER_FETCH) + { + // With ES2, leave separate variable names for input + if (!WriteToFragData(psContext->psShader->eTargetLanguage) && + name.size() == 13 && !strncmp(name.c_str(), "vs_SV_Target", 12)) + bcatcstr(glsl, name.substr(3).c_str()); + else + bcatcstr(glsl, name.c_str()); + } + else + { + bcatcstr(glsl, name.c_str()); + } + } + } + break; + } + } + break; + } + case OPERAND_TYPE_OUTPUT: + { + /*if(psContext->psShader->eShaderType == HULL_SHADER && psOperand->eIndexRep[0] == OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE) + { + int stream = 0; + const char* name = GetDeclaredOutputName(psContext, HULL_SHADER, psOperand, &stream); + bcatcstr(glsl, name); + } + else*/ + { + int stream = 0; + std::string name = psContext->GetDeclaredOutputName(psOperand, &stream, pui32IgnoreSwizzle, piRebase, 0); + bcatcstr(glsl, name.c_str()); + if (psOperand->m_SubOperands[0].get()) + { + bcatcstr(glsl, "["); + TranslateOperand(psOperand->m_SubOperands[0].get(), TO_AUTO_BITCAST_TO_INT); + bcatcstr(glsl, "]"); + } + } + break; + } + case OPERAND_TYPE_OUTPUT_DEPTH: + if (psContext->psShader->eTargetLanguage == LANG_ES_100 && !psContext->EnableExtension("GL_EXT_frag_depth")) + { + bcatcstr(psContext->extensions, "#define gl_FragDepth gl_FragDepthEXT\n"); + } + // fall through + case OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL: + case OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL: + { + bcatcstr(glsl, "gl_FragDepth"); + break; + } + case OPERAND_TYPE_TEMP: + { + SHADER_VARIABLE_TYPE eTempType = psOperand->GetDataType(psContext); + + if (psOperand->eSpecialName == NAME_UNDEFINED && psOperand->specialName.length()) + { + bcatcstr(glsl, psOperand->specialName.c_str()); + break; + } + + bcatcstr(glsl, HLSLCC_TEMP_PREFIX); + ASSERT(psOperand->ui32RegisterNumber < 0x10000); // Sanity check after temp splitting. + switch (eTempType) + { + case SVT_FLOAT: + ASSERT(psContext->psShader->psFloatTempSizes[psOperand->ui32RegisterNumber] != 0); + if (psContext->psShader->psFloatTempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + break; + case SVT_FLOAT16: + ASSERT(psContext->psShader->psFloat16TempSizes[psOperand->ui32RegisterNumber] != 0); + bcatcstr(glsl, "16_"); + if (psContext->psShader->psFloat16TempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + break; + case SVT_FLOAT10: + ASSERT(psContext->psShader->psFloat10TempSizes[psOperand->ui32RegisterNumber] != 0); + bcatcstr(glsl, "10_"); + if (psContext->psShader->psFloat10TempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + break; + case SVT_INT: + ASSERT(psContext->psShader->psIntTempSizes[psOperand->ui32RegisterNumber] != 0); + bcatcstr(glsl, "i"); + if (psContext->psShader->psIntTempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + break; + case SVT_INT16: + ASSERT(psContext->psShader->psInt16TempSizes[psOperand->ui32RegisterNumber] != 0); + bcatcstr(glsl, "i16_"); + if (psContext->psShader->psInt16TempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + break; + case SVT_INT12: + ASSERT(psContext->psShader->psInt12TempSizes[psOperand->ui32RegisterNumber] != 0); + bcatcstr(glsl, "i12_"); + if (psContext->psShader->psInt12TempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + break; + case SVT_UINT: + ASSERT(psContext->psShader->psUIntTempSizes[psOperand->ui32RegisterNumber] != 0); + bcatcstr(glsl, "u"); + if (psContext->psShader->psUIntTempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + break; + case SVT_UINT16: + ASSERT(psContext->psShader->psUInt16TempSizes[psOperand->ui32RegisterNumber] != 0); + bcatcstr(glsl, "u16_"); + if (psContext->psShader->psUInt16TempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + break; + case SVT_DOUBLE: + ASSERT(psContext->psShader->psDoubleTempSizes[psOperand->ui32RegisterNumber] != 0); + bcatcstr(glsl, "d"); + if (psContext->psShader->psDoubleTempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + break; + case SVT_BOOL: + ASSERT(psContext->psShader->psBoolTempSizes[psOperand->ui32RegisterNumber] != 0); + bcatcstr(glsl, "b"); + if (psContext->psShader->psBoolTempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + break; + default: + ASSERT(0 && "Should never get here!"); + } + // m_ForLoopInductorName overrides the register number, if available + if (psOperand->m_ForLoopInductorName != 0) + { + bformata(glsl, "_loop_%d", psOperand->m_ForLoopInductorName); + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + } + else + bformata(glsl, "%d", psOperand->ui32RegisterNumber); + break; + } + case OPERAND_TYPE_SPECIAL_IMMCONSTINT: + { + bformata(glsl, "IntImmConst%d", psOperand->ui32RegisterNumber); + break; + } + case OPERAND_TYPE_SPECIAL_IMMCONST: + { + ASSERT(0 && "DX9 shaders no longer supported!"); + break; + } + case OPERAND_TYPE_SPECIAL_OUTBASECOLOUR: + { + bcatcstr(glsl, "BaseColour"); + break; + } + case OPERAND_TYPE_SPECIAL_OUTOFFSETCOLOUR: + { + bcatcstr(glsl, "OffsetColour"); + break; + } + case OPERAND_TYPE_SPECIAL_POSITION: + { + bcatcstr(glsl, "gl_Position"); + break; + } + case OPERAND_TYPE_SPECIAL_FOG: + { + bcatcstr(glsl, "Fog"); + break; + } + case OPERAND_TYPE_SPECIAL_POINTSIZE: + { + bcatcstr(glsl, "gl_PointSize"); + break; + } + case OPERAND_TYPE_SPECIAL_ADDRESS: + { + bcatcstr(glsl, "Address"); + break; + } + case OPERAND_TYPE_SPECIAL_LOOPCOUNTER: + { + bcatcstr(glsl, "LoopCounter"); + pui32IgnoreSwizzle[0] = 1; + break; + } + case OPERAND_TYPE_SPECIAL_TEXCOORD: + { + bformata(glsl, "TexCoord%d", psOperand->ui32RegisterNumber); + break; + } + case OPERAND_TYPE_CONSTANT_BUFFER: + { + const char* StageName = "VS"; + const ConstantBuffer* psCBuf = NULL; + const ShaderVarType* psVarType = NULL; + int32_t index = -1; + std::vector arrayIndices; + bool isArray = false; + bool isSubpassMS = false; + psContext->psShader->sInfo.GetConstantBufferFromBindingPoint(RGROUP_CBUFFER, psOperand->aui32ArraySizes[0], &psCBuf); + + switch (psContext->psShader->eShaderType) + { + case PIXEL_SHADER: + { + StageName = "PS"; + break; + } + case HULL_SHADER: + { + StageName = "HS"; + break; + } + case DOMAIN_SHADER: + { + StageName = "DS"; + break; + } + case GEOMETRY_SHADER: + { + StageName = "GS"; + break; + } + case COMPUTE_SHADER: + { + StageName = "CS"; + break; + } + default: + { + break; + } + } + + if (psCBuf && psCBuf->name == "OVR_multiview") + { + pui32IgnoreSwizzle[0] = 1; + bformata(glsl, "gl_ViewID_OVR"); + break; + } + + + if (ui32TOFlag & TO_FLAG_DECLARATION_NAME) + { + pui32IgnoreSwizzle[0] = 1; + } + + // FIXME: With ES 3.0 the buffer name is often not prepended to variable names + if (((psContext->flags & HLSLCC_FLAG_UNIFORM_BUFFER_OBJECT) != HLSLCC_FLAG_UNIFORM_BUFFER_OBJECT) && + ((psContext->flags & HLSLCC_FLAG_DISABLE_GLOBALS_STRUCT) != HLSLCC_FLAG_DISABLE_GLOBALS_STRUCT)) + { + if (psCBuf) + { + //$Globals. + if (psCBuf->name[0] == '$') + { + bformata(glsl, "Globals%s", StageName); + } + else + { + bformata(glsl, "%s%s", psCBuf->name.c_str(), StageName); + } + if ((ui32TOFlag & TO_FLAG_DECLARATION_NAME) != TO_FLAG_DECLARATION_NAME) + { + bcatcstr(glsl, "."); + } + } + else + { + //bformata(glsl, "cb%d", psOperand->aui32ArraySizes[0]); + } + } + + if ((ui32TOFlag & TO_FLAG_DECLARATION_NAME) != TO_FLAG_DECLARATION_NAME) + { + //Work out the variable name. Don't apply swizzle to that variable yet. + int32_t rebase = 0; ASSERT(psCBuf != NULL); - uint32_t componentsNeeded = 1; - uint32_t minSwiz = 3; - uint32_t maxSwiz = 0; - if (psOperand->eSelMode != OPERAND_4_COMPONENT_SELECT_1_MODE) - { - int i; - for (i = 0; i < 4; i++) - { - if ((ui32CompMask & (1 << i)) == 0) - continue; - minSwiz = std::min(minSwiz, psOperand->aui32Swizzle[i]); - maxSwiz = std::max(maxSwiz, psOperand->aui32Swizzle[i]); - } - componentsNeeded = maxSwiz - minSwiz + 1; - } - else - { - minSwiz = maxSwiz = 1; - } - - // When we have a component mask that doesn't have .x set (this basically only happens when we manually open operands into components) - // We have to pull down the swizzle array to match the first bit that's actually set - uint32_t tmpSwizzle[4] = { 0 }; - int firstBitSet = 0; - if (ui32CompMask == 0) - ui32CompMask = 0xf; - while ((ui32CompMask & (1 << firstBitSet)) == 0) - firstBitSet++; - std::copy(&psOperand->aui32Swizzle[firstBitSet], &psOperand->aui32Swizzle[4], &tmpSwizzle[0]); + uint32_t componentsNeeded = 1; + uint32_t minSwiz = 3; + uint32_t maxSwiz = 0; + if (psOperand->eSelMode != OPERAND_4_COMPONENT_SELECT_1_MODE) + { + int i; + for (i = 0; i < 4; i++) + { + if ((ui32CompMask & (1 << i)) == 0) + continue; + minSwiz = std::min(minSwiz, psOperand->aui32Swizzle[i]); + maxSwiz = std::max(maxSwiz, psOperand->aui32Swizzle[i]); + } + componentsNeeded = maxSwiz - minSwiz + 1; + } + else + { + minSwiz = maxSwiz = 1; + } + + // When we have a component mask that doesn't have .x set (this basically only happens when we manually open operands into components) + // We have to pull down the swizzle array to match the first bit that's actually set + uint32_t tmpSwizzle[4] = { 0 }; + int firstBitSet = 0; + if (ui32CompMask == 0) + ui32CompMask = 0xf; + while ((ui32CompMask & (1 << firstBitSet)) == 0) + firstBitSet++; + std::copy(&psOperand->aui32Swizzle[firstBitSet], &psOperand->aui32Swizzle[4], &tmpSwizzle[0]); ShaderInfo::GetShaderVarFromOffset(psOperand->aui32ArraySizes[1], tmpSwizzle, psCBuf, &psVarType, &isArray, &arrayIndices, &rebase, psContext->flags); @@ -1047,100 +1057,100 @@ void ToGLSL::TranslateVariableNameWithMask(bstring glsl, const Operand* psOperan bcstrfree(tmp); bdestroy(dynamicIndex); - if (psOperand->eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE || ((componentsNeeded+minSwiz) <= psVarType->Columns)) - { - // Simple case: just access one component - std::string fullName = ShaderInfo::GetShaderVarIndexedFullName(psVarType, arrayIndices, dynamicIndexStr, needsIndexCalcRevert, psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES); - - if ((psContext->flags & HLSLCC_FLAG_UNIFORM_BUFFER_OBJECT_WITH_INSTANCE_NAME) && psCBuf) - { - std::string instanceName = UniformBufferInstanceName(psContext, psCBuf->name); - bformata(glsl, "%s.", instanceName.c_str()); - } - - // Special hack for MSAA subpass inputs: the index is actually the sample index, so do special handling later. - if (strncmp(fullName.c_str(), "subpassLoad", 11) == 0 && fullName[fullName.length() - 1] == ',') - isSubpassMS = true; - - if (((psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) != 0) && ((psVarType->Class == SVC_MATRIX_ROWS) || (psVarType->Class == SVC_MATRIX_COLUMNS))) - { - // We'll need to add the prefix only to the last section of the name - size_t commaPos = fullName.find_last_of('.'); - char prefix[256]; - sprintf(prefix, HLSLCC_TRANSLATE_MATRIX_FORMAT_STRING, psVarType->Rows, psVarType->Columns); - if (commaPos == std::string::npos) - fullName.insert(0, prefix); - else - fullName.insert(commaPos + 1, prefix); - - bformata(glsl, "%s", fullName.c_str()); - } - else - bformata(glsl, "%s", fullName.c_str()); - } - else - { - // Non-simple case: build vec4 and apply mask - - std::string instanceNamePrefix; - if ((psContext->flags & HLSLCC_FLAG_UNIFORM_BUFFER_OBJECT_WITH_INSTANCE_NAME) && psCBuf) - { - std::string instanceName = UniformBufferInstanceName(psContext, psCBuf->name); - instanceNamePrefix = instanceName + "."; - } - - uint32_t i; - std::vector tmpArrayIndices; - bool tmpIsArray; - int32_t tmpRebase; - int firstItemAdded = 0; - - bformata(glsl, "%s(", GetConstructorForType(psContext, psVarType->Type, GetNumberBitsSet(ui32CompMask), false)); - for (i = 0; i < 4; i++) - { - const ShaderVarType *tmpVarType = NULL; - if ((ui32CompMask & (1 << i)) == 0) - continue; - tmpRebase = 0; - if (firstItemAdded != 0) - bcatcstr(glsl, ", "); - else - firstItemAdded = 1; - - memset(tmpSwizzle, 0, sizeof(uint32_t) * 4); - std::copy(&psOperand->aui32Swizzle[i], &psOperand->aui32Swizzle[4], &tmpSwizzle[0]); - - ShaderInfo::GetShaderVarFromOffset(psOperand->aui32ArraySizes[1], tmpSwizzle, psCBuf, &tmpVarType, &tmpIsArray, &tmpArrayIndices, &tmpRebase, psContext->flags); - std::string fullName = ShaderInfo::GetShaderVarIndexedFullName(tmpVarType, tmpArrayIndices, dynamicIndexStr, needsIndexCalcRevert, psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES); - - // Special hack for MSAA subpass inputs: the index is actually the sample index, so do special handling later. - if (strncmp(fullName.c_str(), "subpassLoad", 11) == 0 && fullName[fullName.length() - 1] == ',') - isSubpassMS = true; - - if (tmpVarType->Class == SVC_SCALAR) - { - bformata(glsl, "%s%s", instanceNamePrefix.c_str(), fullName.c_str()); - } - else - { - uint32_t swizzle; - tmpRebase /= 4; // 0 => 0, 4 => 1, 8 => 2, 12 /= 3 - swizzle = psOperand->aui32Swizzle[i] - tmpRebase; - - bformata(glsl, "%s%s", instanceNamePrefix.c_str(), fullName.c_str()); - bformata(glsl, ".%c", "xyzw"[swizzle]); - } - } - bcatcstr(glsl, ")"); - // Clear rebase, we've already done it. - rebase = 0; - // Also swizzle. - *pui32IgnoreSwizzle = 1; - } - - if (isArray) - { - index = arrayIndices.back(); + if (psOperand->eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE || ((componentsNeeded + minSwiz) <= psVarType->Columns)) + { + // Simple case: just access one component + std::string fullName = ShaderInfo::GetShaderVarIndexedFullName(psVarType, arrayIndices, dynamicIndexStr, needsIndexCalcRevert, psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES); + + if ((psContext->flags & HLSLCC_FLAG_UNIFORM_BUFFER_OBJECT_WITH_INSTANCE_NAME) && psCBuf) + { + std::string instanceName = UniformBufferInstanceName(psContext, psCBuf->name); + bformata(glsl, "%s.", instanceName.c_str()); + } + + // Special hack for MSAA subpass inputs: the index is actually the sample index, so do special handling later. + if (strncmp(fullName.c_str(), "subpassLoad", 11) == 0 && fullName[fullName.length() - 1] == ',') + isSubpassMS = true; + + if (((psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) != 0) && ((psVarType->Class == SVC_MATRIX_ROWS) || (psVarType->Class == SVC_MATRIX_COLUMNS))) + { + // We'll need to add the prefix only to the last section of the name + size_t commaPos = fullName.find_last_of('.'); + char prefix[256]; + sprintf(prefix, HLSLCC_TRANSLATE_MATRIX_FORMAT_STRING, psVarType->Rows, psVarType->Columns); + if (commaPos == std::string::npos) + fullName.insert(0, prefix); + else + fullName.insert(commaPos + 1, prefix); + + bformata(glsl, "%s", fullName.c_str()); + } + else + bformata(glsl, "%s", fullName.c_str()); + } + else + { + // Non-simple case: build vec4 and apply mask + + std::string instanceNamePrefix; + if ((psContext->flags & HLSLCC_FLAG_UNIFORM_BUFFER_OBJECT_WITH_INSTANCE_NAME) && psCBuf) + { + std::string instanceName = UniformBufferInstanceName(psContext, psCBuf->name); + instanceNamePrefix = instanceName + "."; + } + + uint32_t i; + std::vector tmpArrayIndices; + bool tmpIsArray; + int32_t tmpRebase; + int firstItemAdded = 0; + + bformata(glsl, "%s(", GetConstructorForType(psContext, psVarType->Type, GetNumberBitsSet(ui32CompMask), false)); + for (i = 0; i < 4; i++) + { + const ShaderVarType *tmpVarType = NULL; + if ((ui32CompMask & (1 << i)) == 0) + continue; + tmpRebase = 0; + if (firstItemAdded != 0) + bcatcstr(glsl, ", "); + else + firstItemAdded = 1; + + memset(tmpSwizzle, 0, sizeof(uint32_t) * 4); + std::copy(&psOperand->aui32Swizzle[i], &psOperand->aui32Swizzle[4], &tmpSwizzle[0]); + + ShaderInfo::GetShaderVarFromOffset(psOperand->aui32ArraySizes[1], tmpSwizzle, psCBuf, &tmpVarType, &tmpIsArray, &tmpArrayIndices, &tmpRebase, psContext->flags); + std::string fullName = ShaderInfo::GetShaderVarIndexedFullName(tmpVarType, tmpArrayIndices, dynamicIndexStr, needsIndexCalcRevert, psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES); + + // Special hack for MSAA subpass inputs: the index is actually the sample index, so do special handling later. + if (strncmp(fullName.c_str(), "subpassLoad", 11) == 0 && fullName[fullName.length() - 1] == ',') + isSubpassMS = true; + + if (tmpVarType->Class == SVC_SCALAR) + { + bformata(glsl, "%s%s", instanceNamePrefix.c_str(), fullName.c_str()); + } + else + { + uint32_t swizzle; + tmpRebase /= 4; // 0 => 0, 4 => 1, 8 => 2, 12 /= 3 + swizzle = psOperand->aui32Swizzle[i] - tmpRebase; + + bformata(glsl, "%s%s", instanceNamePrefix.c_str(), fullName.c_str()); + bformata(glsl, ".%c", "xyzw"[swizzle]); + } + } + bcatcstr(glsl, ")"); + // Clear rebase, we've already done it. + rebase = 0; + // Also swizzle. + *pui32IgnoreSwizzle = 1; + } + + if (isArray) + { + index = arrayIndices.back(); // Dynamic index is atm supported only at the root array level. Add here only if there is no such parent. bool hasDynamicIndex = !dynamicIndexStr.empty() && (arrayIndices.size() <= 1); @@ -1164,659 +1174,671 @@ void ToGLSL::TranslateVariableNameWithMask(bstring glsl, const Operand* psOperan if (((psVarType->Class == SVC_MATRIX_COLUMNS) || (psVarType->Class == SVC_MATRIX_ROWS)) && (psVarType->Elements > 1) && ((psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) == 0)) { // Special handling for old matrix arrays - bformata(glsl, "%%s / 4%s", squareBrackets[squareBracketType][0], fullIndexOss.str().c_str(), squareBrackets[squareBracketType][1]); + bformata(glsl, "%s%s / 4%s", squareBrackets[squareBracketType][0], fullIndexOss.str().c_str(), squareBrackets[squareBracketType][1]); bformata(glsl, "%s%s %% 4%s", squareBrackets[squareBracketType][0], fullIndexOss.str().c_str(), squareBrackets[squareBracketType][1]); } else // This path is atm the default { - if(isSubpassMS) - bformata(glsl, "%s%s%s", " ", fullIndexOss.str().c_str(), ")"); - else - bformata(glsl, "%s%s%s", squareBrackets[squareBracketType][0], fullIndexOss.str().c_str(), squareBrackets[squareBracketType][1]); + if (isSubpassMS) + bformata(glsl, "%s%s%s", " ", fullIndexOss.str().c_str(), ")"); + else + bformata(glsl, "%s%s%s", squareBrackets[squareBracketType][0], fullIndexOss.str().c_str(), squareBrackets[squareBracketType][1]); + } + } + } + + if (psVarType && psVarType->Class == SVC_VECTOR && !*pui32IgnoreSwizzle) + { + switch (rebase) + { + case 4: + { + if (psVarType->Columns == 2) + { + //.x(GLSL) is .y(HLSL). .y(GLSL) is .z(HLSL) + bcatcstr(glsl, ".xxyx"); + } + else if (psVarType->Columns == 3) + { + //.x(GLSL) is .y(HLSL). .y(GLSL) is .z(HLSL) .z(GLSL) is .w(HLSL) + bcatcstr(glsl, ".xxyz"); + } + break; + } + case 8: + { + if (psVarType->Columns == 2) + { + //.x(GLSL) is .z(HLSL). .y(GLSL) is .w(HLSL) + bcatcstr(glsl, ".xxxy"); + } + break; + } + case 0: + default: + { + //No rebase, but extend to vec4 if needed + uint32_t maxComp = psOperand->GetMaxComponent(); + if (psVarType->Columns == 2 && maxComp > 2) + { + bcatcstr(glsl, ".xyxx"); + } + else if (psVarType->Columns == 3 && maxComp > 3) + { + bcatcstr(glsl, ".xyzx"); + } + break; } } } - - if(psVarType && psVarType->Class == SVC_VECTOR && !*pui32IgnoreSwizzle) - { - switch(rebase) - { - case 4: - { - if(psVarType->Columns == 2) - { - //.x(GLSL) is .y(HLSL). .y(GLSL) is .z(HLSL) - bcatcstr(glsl, ".xxyx"); - } - else if(psVarType->Columns == 3) - { - //.x(GLSL) is .y(HLSL). .y(GLSL) is .z(HLSL) .z(GLSL) is .w(HLSL) - bcatcstr(glsl, ".xxyz"); - } - break; - } - case 8: - { - if(psVarType->Columns == 2) - { - //.x(GLSL) is .z(HLSL). .y(GLSL) is .w(HLSL) - bcatcstr(glsl, ".xxxy"); - } - break; - } - case 0: - default: - { - //No rebase, but extend to vec4 if needed - uint32_t maxComp = psOperand->GetMaxComponent(); - if(psVarType->Columns == 2 && maxComp > 2) - { - bcatcstr(glsl, ".xyxx"); - } - else if(psVarType->Columns == 3 && maxComp > 3) - { - bcatcstr(glsl, ".xyzx"); - } - break; - } - - } - } - - if(psVarType && psVarType->Class == SVC_SCALAR) - { - *pui32IgnoreSwizzle = 1; - } - } - break; - } - case OPERAND_TYPE_RESOURCE: - { - ResourceName(glsl, psContext, RGROUP_TEXTURE, psOperand->ui32RegisterNumber, 0); - *pui32IgnoreSwizzle = 1; - break; - } - case OPERAND_TYPE_SAMPLER: - { - bformata(glsl, "Sampler%d", psOperand->ui32RegisterNumber); - *pui32IgnoreSwizzle = 1; - break; - } - case OPERAND_TYPE_FUNCTION_BODY: - { - const uint32_t ui32FuncBody = psOperand->ui32RegisterNumber; - const uint32_t ui32FuncTable = psContext->psShader->aui32FuncBodyToFuncTable[ui32FuncBody]; - //const uint32_t ui32FuncPointer = psContext->psShader->aui32FuncTableToFuncPointer[ui32FuncTable]; - const uint32_t ui32ClassType = psContext->psShader->sInfo.aui32TableIDToTypeID[ui32FuncTable]; - const char* ClassTypeName = &psContext->psShader->sInfo.psClassTypes[ui32ClassType].name[0]; - const uint32_t ui32UniqueClassFuncIndex = psContext->psShader->ui32NextClassFuncName[ui32ClassType]++; - - bformata(glsl, "%s_Func%d", ClassTypeName, ui32UniqueClassFuncIndex); - break; - } - case OPERAND_TYPE_INPUT_FORK_INSTANCE_ID: - case OPERAND_TYPE_INPUT_JOIN_INSTANCE_ID: - { - bcatcstr(glsl, "phaseInstanceID"); // Not a real builtin, but passed as a function parameter. - *pui32IgnoreSwizzle = 1; - break; - } - case OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER: - { - if (psContext->IsVulkan()) - { - bformata(glsl, "ImmCB_%d", psContext->currentPhase); - TranslateOperandIndex(psOperand, 0); - } - else - { - int squareBracketType = HaveDynamicIndexing(psContext, psOperand); - - bformata(glsl, "ImmCB_%d_%d_%d", psContext->currentPhase, psOperand->ui32RegisterNumber, psOperand->m_Rebase); - if (psOperand->m_SubOperands[0].get()) - { - bformata(glsl, "%s", squareBrackets[squareBracketType][0]); //Indexes must be integral. Offset is already taken care of above. - TranslateOperand(psOperand->m_SubOperands[0].get(), TO_FLAG_INTEGER); - bformata(glsl, "%s", squareBrackets[squareBracketType][1]); - } - if (psOperand->m_Size == 1) - *pui32IgnoreSwizzle = 1; - } - break; - } - case OPERAND_TYPE_INPUT_DOMAIN_POINT: - { - bcatcstr(glsl, "gl_TessCoord"); - break; - } - case OPERAND_TYPE_INPUT_CONTROL_POINT: - { - const ShaderInfo::InOutSignature *psSig = NULL; - psContext->psShader->sInfo.GetInputSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->ui32CompMask, &psSig); - - if ((psSig->eSystemValueType == NAME_POSITION && psSig->ui32SemanticIndex == 0) || - (psSig->semanticName == "POS" && psSig->ui32SemanticIndex == 0) || - (psSig->semanticName == "SV_POSITION" && psSig->ui32SemanticIndex == 0)) - { - bcatcstr(glsl, "gl_in"); - TranslateOperandIndex(psOperand, 0);//Vertex index - bcatcstr(glsl, ".gl_Position"); - } - else - { - std::string name = psContext->GetDeclaredInputName(psOperand, piRebase, 0, pui32IgnoreSwizzle); - - bformata(glsl, "%s", name.c_str()); - TranslateOperandIndex(psOperand, 0);//Vertex index - - // Check for scalar - if ((psContext->psShader->abScalarInput[psOperand->GetRegisterSpace(psContext)][psOperand->ui32RegisterNumber] & psOperand->GetAccessMask()) != 0) - *pui32IgnoreSwizzle = 1; - } - break; - } - case OPERAND_TYPE_NULL: - { - // Null register, used to discard results of operations - bcatcstr(glsl, "//null"); - break; - } - case OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID: - { - bcatcstr(glsl, "gl_InvocationID"); - *pui32IgnoreSwizzle = 1; - break; - } - case OPERAND_TYPE_OUTPUT_COVERAGE_MASK: - { - bcatcstr(glsl, "gl_SampleMask[0]"); - *pui32IgnoreSwizzle = 1; - break; - } - case OPERAND_TYPE_INPUT_COVERAGE_MASK: - { - bcatcstr(glsl, "gl_SampleMaskIn[0]"); - //Skip swizzle on scalar types. - *pui32IgnoreSwizzle = 1; - break; - } - case OPERAND_TYPE_INPUT_THREAD_ID://SV_DispatchThreadID - { - bcatcstr(glsl, "gl_GlobalInvocationID"); - break; - } - case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP://SV_GroupThreadID - { - bcatcstr(glsl, "gl_LocalInvocationID"); - break; - } - case OPERAND_TYPE_INPUT_THREAD_GROUP_ID://SV_GroupID - { - bcatcstr(glsl, "gl_WorkGroupID"); - break; - } - case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED://SV_GroupIndex - { - bcatcstr(glsl, "gl_LocalInvocationIndex"); - *pui32IgnoreSwizzle = 1; // No swizzle meaningful for scalar. - break; - } - case OPERAND_TYPE_UNORDERED_ACCESS_VIEW: - { - ResourceName(glsl, psContext, RGROUP_UAV, psOperand->ui32RegisterNumber, 0); - break; - } - case OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY: - { - bformata(glsl, "TGSM%d", psOperand->ui32RegisterNumber); - *pui32IgnoreSwizzle = 1; - break; - } - case OPERAND_TYPE_INPUT_PRIMITIVEID: - { - if(psContext->psShader->eShaderType == GEOMETRY_SHADER) - bcatcstr(glsl, "gl_PrimitiveIDIn"); // LOL OpenGL - else - bcatcstr(glsl, "gl_PrimitiveID"); - - break; - } - case OPERAND_TYPE_INDEXABLE_TEMP: - { - bformata(glsl, "TempArray%d", psOperand->aui32ArraySizes[0]); - bcatcstr(glsl, "["); - if (psOperand->aui32ArraySizes[1] != 0 || !psOperand->m_SubOperands[1].get()) - bformata(glsl, "%d", psOperand->aui32ArraySizes[1]); - - if(psOperand->m_SubOperands[1].get()) - { - if (psOperand->aui32ArraySizes[1] != 0) - bcatcstr(glsl, "+"); - TranslateOperand(psOperand->m_SubOperands[1].get(), TO_FLAG_INTEGER); - - } - bcatcstr(glsl, "]"); - break; - } - case OPERAND_TYPE_STREAM: - { - bformata(glsl, "%d", psOperand->ui32RegisterNumber); - break; - } - case OPERAND_TYPE_INPUT_GS_INSTANCE_ID: - { - // In HLSL the instance id is uint, so cast here. - bcatcstr(glsl, "uint(gl_InvocationID)"); - break; - } - case OPERAND_TYPE_THIS_POINTER: - { - /* - The "this" register is a register that provides up to 4 pieces of information: - X: Which CB holds the instance data - Y: Base element offset of the instance data within the instance CB - Z: Base sampler index - W: Base Texture index - - Can be different for each function call - */ - break; - } - case OPERAND_TYPE_INPUT_PATCH_CONSTANT: - { - const ShaderInfo::InOutSignature* psIn; - psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->GetAccessMask(), &psIn); - *piRebase = psIn->iRebase; - switch (psIn->eSystemValueType) - { - case NAME_POSITION: - bcatcstr(glsl, "gl_Position"); - break; - case NAME_RENDER_TARGET_ARRAY_INDEX: - bcatcstr(glsl, "gl_Layer"); - *pui32IgnoreSwizzle = 1; - break; - case NAME_CLIP_DISTANCE: - bcatcstr(glsl, "gl_ClipDistance"); - *pui32IgnoreSwizzle = 1; - break; - case NAME_CULL_DISTANCE: - bcatcstr(glsl, "gl_CullDistance"); - *pui32IgnoreSwizzle = 1; - break; - case NAME_VIEWPORT_ARRAY_INDEX: - bcatcstr(glsl, "gl_ViewportIndex"); - *pui32IgnoreSwizzle = 1; - break; - case NAME_VERTEX_ID: - if ((psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0) - bcatcstr(glsl, "gl_VertexIndex"); - else - bcatcstr(glsl, "gl_VertexID"); - *pui32IgnoreSwizzle = 1; - break; - case NAME_INSTANCE_ID: - if ((psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0) - bcatcstr(glsl, "gl_InstanceIndex"); - else - bcatcstr(glsl, "gl_InstanceID"); - *pui32IgnoreSwizzle = 1; - break; - case NAME_IS_FRONT_FACE: - if (HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) - bcatcstr(glsl, "(gl_FrontFacing ? 0xffffffffu : uint(0))"); - else - bcatcstr(glsl, "(gl_FrontFacing ? int(1) : int(0))"); - *pui32IgnoreSwizzle = 1; - break; - case NAME_PRIMITIVE_ID: - bcatcstr(glsl, "gl_PrimitiveID"); - *pui32IgnoreSwizzle = 1; - break; - case NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR: - case NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR: - case NAME_FINAL_LINE_DENSITY_TESSFACTOR: - if (psContext->psShader->aIndexedOutput[1][psOperand->ui32RegisterNumber]) - bcatcstr(glsl, "gl_TessLevelOuter"); - else - bcatcstr(glsl, "gl_TessLevelOuter[0]"); - *pui32IgnoreSwizzle = 1; - break; - case NAME_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR: - case NAME_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR: - case NAME_FINAL_LINE_DETAIL_TESSFACTOR: - bcatcstr(glsl, "gl_TessLevelOuter[1]"); - *pui32IgnoreSwizzle = 1; - break; - case NAME_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR: - case NAME_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR: - bcatcstr(glsl, "gl_TessLevelOuter[2]"); - *pui32IgnoreSwizzle = 1; - break; - case NAME_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR: - bcatcstr(glsl, "gl_TessLevelOuter[3]"); - *pui32IgnoreSwizzle = 1; - break; - - case NAME_FINAL_TRI_INSIDE_TESSFACTOR: - case NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR: - if (psContext->psShader->aIndexedOutput[1][psOperand->ui32RegisterNumber]) - bcatcstr(glsl, "gl_TessLevelInner"); - else - bcatcstr(glsl, "gl_TessLevelInner[0]"); - *pui32IgnoreSwizzle = 1; - break; - case NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR: - bcatcstr(glsl, "gl_TessLevelInner[1]"); - *pui32IgnoreSwizzle = 1; - break; - default: - bformata(glsl, "%spatch%s%d", psContext->psShader->eShaderType == HULL_SHADER ? psContext->outputPrefix : psContext->inputPrefix, psIn->semanticName.c_str(), psIn->ui32SemanticIndex); - // Disable swizzles if this is a scalar - if (psContext->psShader->eShaderType == HULL_SHADER) - { - if ((psContext->psShader->abScalarOutput[1][psOperand->ui32RegisterNumber] & psOperand->GetAccessMask()) != 0) - *pui32IgnoreSwizzle = 1; - } - else - { - if ((psContext->psShader->abScalarInput[1][psOperand->ui32RegisterNumber] & psOperand->GetAccessMask()) != 0) - *pui32IgnoreSwizzle = 1; - } - - break; - } - - - break; - } - default: - { - ASSERT(0); - break; - } - } - - if (hasCtor && (*pui32IgnoreSwizzle == 0)) - { - TranslateOperandSwizzleWithMask(glsl, psContext, psOperand, ui32CompMask, piRebase ? *piRebase : 0); - *pui32IgnoreSwizzle = 1; - } - - if (needsBoolUpscale) - { - if (requestedType == SVT_UINT || requestedType == SVT_UINT16 || requestedType == SVT_UINT8) - bcatcstr(glsl, ") * 0xffffffffu"); - else - { - if (HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) - bcatcstr(glsl, ") * int(0xffffffffu)"); - else - bcatcstr(glsl, ") * int(0xffffffff)"); - } - - numParenthesis--; - } - - while (numParenthesis != 0) - { - bcatcstr(glsl, ")"); - numParenthesis--; - } + + if (psVarType && psVarType->Class == SVC_SCALAR) + { + *pui32IgnoreSwizzle = 1; + } + } + break; + } + case OPERAND_TYPE_RESOURCE: + { + ResourceName(glsl, psContext, RGROUP_TEXTURE, psOperand->ui32RegisterNumber, 0); + *pui32IgnoreSwizzle = 1; + break; + } + case OPERAND_TYPE_SAMPLER: + { + bformata(glsl, "Sampler%d", psOperand->ui32RegisterNumber); + *pui32IgnoreSwizzle = 1; + break; + } + case OPERAND_TYPE_FUNCTION_BODY: + { + const uint32_t ui32FuncBody = psOperand->ui32RegisterNumber; + const uint32_t ui32FuncTable = psContext->psShader->aui32FuncBodyToFuncTable[ui32FuncBody]; + //const uint32_t ui32FuncPointer = psContext->psShader->aui32FuncTableToFuncPointer[ui32FuncTable]; + const uint32_t ui32ClassType = psContext->psShader->sInfo.aui32TableIDToTypeID[ui32FuncTable]; + const char* ClassTypeName = &psContext->psShader->sInfo.psClassTypes[ui32ClassType].name[0]; + const uint32_t ui32UniqueClassFuncIndex = psContext->psShader->ui32NextClassFuncName[ui32ClassType]++; + + bformata(glsl, "%s_Func%d", ClassTypeName, ui32UniqueClassFuncIndex); + break; + } + case OPERAND_TYPE_INPUT_FORK_INSTANCE_ID: + case OPERAND_TYPE_INPUT_JOIN_INSTANCE_ID: + { + bcatcstr(glsl, "phaseInstanceID"); // Not a real builtin, but passed as a function parameter. + *pui32IgnoreSwizzle = 1; + break; + } + case OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER: + { + if (psContext->IsVulkan() || psContext->IsSwitch()) + { + bformata(glsl, "ImmCB_%d", psContext->currentPhase); + TranslateOperandIndex(psOperand, 0); + } + else + { + int squareBracketType = HaveDynamicIndexing(psContext, psOperand); + + bformata(glsl, "ImmCB_%d_%d_%d", psContext->currentPhase, psOperand->ui32RegisterNumber, psOperand->m_Rebase); + if (psOperand->m_SubOperands[0].get()) + { + bformata(glsl, "%s", squareBrackets[squareBracketType][0]); //Indexes must be integral. Offset is already taken care of above. + TranslateOperand(psOperand->m_SubOperands[0].get(), TO_FLAG_INTEGER); + bformata(glsl, "%s", squareBrackets[squareBracketType][1]); + } + if (psOperand->m_Size == 1) + *pui32IgnoreSwizzle = 1; + } + break; + } + case OPERAND_TYPE_INPUT_DOMAIN_POINT: + { + bcatcstr(glsl, "gl_TessCoord"); + break; + } + case OPERAND_TYPE_INPUT_CONTROL_POINT: + { + const ShaderInfo::InOutSignature *psSig = NULL; + psContext->psShader->sInfo.GetInputSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->ui32CompMask, &psSig); + + if ((psSig->eSystemValueType == NAME_POSITION && psSig->ui32SemanticIndex == 0) || + (psSig->semanticName == "POS" && psSig->ui32SemanticIndex == 0) || + (psSig->semanticName == "SV_POSITION" && psSig->ui32SemanticIndex == 0)) + { + bcatcstr(glsl, "gl_in"); + TranslateOperandIndex(psOperand, 0);//Vertex index + bcatcstr(glsl, ".gl_Position"); + } + else + { + std::string name = psContext->GetDeclaredInputName(psOperand, piRebase, 0, pui32IgnoreSwizzle); + + bformata(glsl, "%s", name.c_str()); + TranslateOperandIndex(psOperand, 0);//Vertex index + + // Check for scalar + if ((psContext->psShader->abScalarInput[psOperand->GetRegisterSpace(psContext)][psOperand->ui32RegisterNumber] & psOperand->GetAccessMask()) != 0) + *pui32IgnoreSwizzle = 1; + } + break; + } + case OPERAND_TYPE_NULL: + { + // Null register, used to discard results of operations + bcatcstr(glsl, "//null"); + break; + } + case OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID: + { + bcatcstr(glsl, "gl_InvocationID"); + *pui32IgnoreSwizzle = 1; + break; + } + case OPERAND_TYPE_OUTPUT_COVERAGE_MASK: + { + bcatcstr(glsl, "gl_SampleMask[0]"); + *pui32IgnoreSwizzle = 1; + break; + } + case OPERAND_TYPE_INPUT_COVERAGE_MASK: + { + bcatcstr(glsl, "gl_SampleMaskIn[0]"); + //Skip swizzle on scalar types. + *pui32IgnoreSwizzle = 1; + break; + } + case OPERAND_TYPE_INPUT_THREAD_ID://SV_DispatchThreadID + { + bcatcstr(glsl, "gl_GlobalInvocationID"); + break; + } + case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP://SV_GroupThreadID + { + bcatcstr(glsl, "gl_LocalInvocationID"); + break; + } + case OPERAND_TYPE_INPUT_THREAD_GROUP_ID://SV_GroupID + { + bcatcstr(glsl, "gl_WorkGroupID"); + break; + } + case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED://SV_GroupIndex + { + if (requestedComponents > 1 && !hasCtor) + { + bcatcstr(glsl, GetConstructorForType(psContext, eType, requestedComponents, false)); + bcatcstr(glsl, "("); + numParenthesis++; + hasCtor = 1; + } + + for (uint32_t i = 0; i < requestedComponents; i++) + { + bcatcstr(glsl, "gl_LocalInvocationIndex"); + if (i < requestedComponents - 1) + bcatcstr(glsl, ", "); + } + *pui32IgnoreSwizzle = 1; // No swizzle meaningful for scalar. + break; + } + case OPERAND_TYPE_UNORDERED_ACCESS_VIEW: + { + ResourceName(glsl, psContext, RGROUP_UAV, psOperand->ui32RegisterNumber, 0); + break; + } + case OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY: + { + bformata(glsl, "TGSM%d", psOperand->ui32RegisterNumber); + *pui32IgnoreSwizzle = 1; + break; + } + case OPERAND_TYPE_INPUT_PRIMITIVEID: + { + if (psContext->psShader->eShaderType == GEOMETRY_SHADER) + bcatcstr(glsl, "gl_PrimitiveIDIn"); // LOL OpenGL + else + bcatcstr(glsl, "gl_PrimitiveID"); + + break; + } + case OPERAND_TYPE_INDEXABLE_TEMP: + { + bformata(glsl, "TempArray%d", psOperand->aui32ArraySizes[0]); + bcatcstr(glsl, "["); + if (psOperand->aui32ArraySizes[1] != 0 || !psOperand->m_SubOperands[1].get()) + bformata(glsl, "%d", psOperand->aui32ArraySizes[1]); + + if (psOperand->m_SubOperands[1].get()) + { + if (psOperand->aui32ArraySizes[1] != 0) + bcatcstr(glsl, "+"); + TranslateOperand(psOperand->m_SubOperands[1].get(), TO_FLAG_INTEGER); + } + bcatcstr(glsl, "]"); + break; + } + case OPERAND_TYPE_STREAM: + { + bformata(glsl, "%d", psOperand->ui32RegisterNumber); + break; + } + case OPERAND_TYPE_INPUT_GS_INSTANCE_ID: + { + // In HLSL the instance id is uint, so cast here. + bcatcstr(glsl, "uint(gl_InvocationID)"); + break; + } + case OPERAND_TYPE_THIS_POINTER: + { + /* + The "this" register is a register that provides up to 4 pieces of information: + X: Which CB holds the instance data + Y: Base element offset of the instance data within the instance CB + Z: Base sampler index + W: Base Texture index + + Can be different for each function call + */ + break; + } + case OPERAND_TYPE_INPUT_PATCH_CONSTANT: + { + const ShaderInfo::InOutSignature* psIn; + psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->GetAccessMask(), &psIn); + *piRebase = psIn->iRebase; + switch (psIn->eSystemValueType) + { + case NAME_POSITION: + bcatcstr(glsl, "gl_Position"); + break; + case NAME_RENDER_TARGET_ARRAY_INDEX: + bcatcstr(glsl, "gl_Layer"); + *pui32IgnoreSwizzle = 1; + break; + case NAME_CLIP_DISTANCE: + bcatcstr(glsl, "gl_ClipDistance"); + *pui32IgnoreSwizzle = 1; + break; + case NAME_CULL_DISTANCE: + bcatcstr(glsl, "gl_CullDistance"); + *pui32IgnoreSwizzle = 1; + break; + case NAME_VIEWPORT_ARRAY_INDEX: + bcatcstr(glsl, "gl_ViewportIndex"); + *pui32IgnoreSwizzle = 1; + break; + case NAME_VERTEX_ID: + if ((psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0) + bcatcstr(glsl, "gl_VertexIndex"); + else + bcatcstr(glsl, "gl_VertexID"); + *pui32IgnoreSwizzle = 1; + break; + case NAME_INSTANCE_ID: + if ((psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0) + bcatcstr(glsl, "gl_InstanceIndex"); + else + bcatcstr(glsl, "gl_InstanceID"); + *pui32IgnoreSwizzle = 1; + break; + case NAME_IS_FRONT_FACE: + if (HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) + bcatcstr(glsl, "(gl_FrontFacing ? 0xffffffffu : uint(0))"); // Old ES3.0 Adrenos treat 0u as const int + else + bcatcstr(glsl, "(gl_FrontFacing ? 1 : 0)"); + *pui32IgnoreSwizzle = 1; + break; + case NAME_PRIMITIVE_ID: + bcatcstr(glsl, "gl_PrimitiveID"); + *pui32IgnoreSwizzle = 1; + break; + case NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR: + case NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR: + case NAME_FINAL_LINE_DENSITY_TESSFACTOR: + if (psContext->psShader->aIndexedOutput[1][psOperand->ui32RegisterNumber]) + bcatcstr(glsl, "gl_TessLevelOuter"); + else + bcatcstr(glsl, "gl_TessLevelOuter[0]"); + *pui32IgnoreSwizzle = 1; + break; + case NAME_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR: + case NAME_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR: + case NAME_FINAL_LINE_DETAIL_TESSFACTOR: + bcatcstr(glsl, "gl_TessLevelOuter[1]"); + *pui32IgnoreSwizzle = 1; + break; + case NAME_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR: + case NAME_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR: + bcatcstr(glsl, "gl_TessLevelOuter[2]"); + *pui32IgnoreSwizzle = 1; + break; + case NAME_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR: + bcatcstr(glsl, "gl_TessLevelOuter[3]"); + *pui32IgnoreSwizzle = 1; + break; + + case NAME_FINAL_TRI_INSIDE_TESSFACTOR: + case NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR: + if (psContext->psShader->aIndexedOutput[1][psOperand->ui32RegisterNumber]) + bcatcstr(glsl, "gl_TessLevelInner"); + else + bcatcstr(glsl, "gl_TessLevelInner[0]"); + *pui32IgnoreSwizzle = 1; + break; + case NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR: + bcatcstr(glsl, "gl_TessLevelInner[1]"); + *pui32IgnoreSwizzle = 1; + break; + default: + bformata(glsl, "%spatch%s%d", psContext->psShader->eShaderType == HULL_SHADER ? psContext->outputPrefix : psContext->inputPrefix, psIn->semanticName.c_str(), psIn->ui32SemanticIndex); + // Disable swizzles if this is a scalar + if (psContext->psShader->eShaderType == HULL_SHADER) + { + if ((psContext->psShader->abScalarOutput[1][psOperand->ui32RegisterNumber] & psOperand->GetAccessMask()) != 0) + *pui32IgnoreSwizzle = 1; + } + else + { + if ((psContext->psShader->abScalarInput[1][psOperand->ui32RegisterNumber] & psOperand->GetAccessMask()) != 0) + *pui32IgnoreSwizzle = 1; + } + + break; + } + + + break; + } + default: + { + ASSERT(0); + break; + } + } + + if (hasCtor && (*pui32IgnoreSwizzle == 0)) + { + TranslateOperandSwizzleWithMask(glsl, psContext, psOperand, ui32CompMask, piRebase ? *piRebase : 0); + *pui32IgnoreSwizzle = 1; + } + + if (needsBoolUpscale) + { + if (requestedType == SVT_UINT || requestedType == SVT_UINT16 || requestedType == SVT_UINT8) + bcatcstr(glsl, ") * 0xffffffffu"); + else + { + if (HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) + bcatcstr(glsl, ") * int(0xffffffffu)"); + else + bcatcstr(glsl, ") * int(0xffff)"); // GLSL ES 2 spec: high precision ints are guaranteed to have a range of (-2^16, 2^16) + } + + numParenthesis--; + bcatcstr(glsl, ")"); + numParenthesis--; + } + + while (numParenthesis != 0) + { + bcatcstr(glsl, ")"); + numParenthesis--; + } } void ToGLSL::TranslateOperand(const Operand* psOperand, uint32_t ui32TOFlag, uint32_t ui32ComponentMask) { - TranslateOperand(*psContext->currentGLSLString, psOperand, ui32TOFlag, ui32ComponentMask); + TranslateOperand(*psContext->currentGLSLString, psOperand, ui32TOFlag, ui32ComponentMask); } void ToGLSL::TranslateOperand(bstring glsl, const Operand* psOperand, uint32_t ui32TOFlag, uint32_t ui32ComponentMask) { - uint32_t ui32IgnoreSwizzle = 0; - int iRebase = 0; - - // in single-component mode there is no need to use mask - if (psOperand->eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE) - ui32ComponentMask = OPERAND_4_COMPONENT_MASK_ALL; - - if(psContext->psShader->ui32MajorVersion <=3) - { - ui32TOFlag &= ~(TO_AUTO_BITCAST_TO_FLOAT|TO_AUTO_BITCAST_TO_INT|TO_AUTO_BITCAST_TO_UINT); - } - - if (!HaveUnsignedTypes(psContext->psShader->eTargetLanguage) && (ui32TOFlag & TO_FLAG_UNSIGNED_INTEGER)) - { - ui32TOFlag &= ~TO_FLAG_UNSIGNED_INTEGER; - ui32TOFlag |= TO_FLAG_INTEGER; - } - - if(ui32TOFlag & TO_FLAG_NAME_ONLY) - { - TranslateVariableNameWithMask(glsl, psOperand, ui32TOFlag, &ui32IgnoreSwizzle, OPERAND_4_COMPONENT_MASK_ALL, &iRebase); - return; - } - - switch(psOperand->eModifier) - { - case OPERAND_MODIFIER_NONE: - { - break; - } - case OPERAND_MODIFIER_NEG: - { - bcatcstr(glsl, "(-"); - break; - } - case OPERAND_MODIFIER_ABS: - { - bcatcstr(glsl, "abs("); - break; - } - case OPERAND_MODIFIER_ABSNEG: - { - bcatcstr(glsl, "-abs("); - break; - } - } - - TranslateVariableNameWithMask(glsl, psOperand, ui32TOFlag, &ui32IgnoreSwizzle, ui32ComponentMask, &iRebase); - - if(psContext->psShader->eShaderType == HULL_SHADER && psOperand->eType == OPERAND_TYPE_OUTPUT && - psOperand->ui32RegisterNumber != 0 && psOperand->iArrayElements != 0 && psOperand->eIndexRep[0] != OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE - && psContext->psShader->asPhases[psContext->currentPhase].ePhase == HS_CTRL_POINT_PHASE) - { - bcatcstr(glsl, "[gl_InvocationID]"); - } - - if(!ui32IgnoreSwizzle) - { - TranslateOperandSwizzleWithMask(glsl, psContext, psOperand, ui32ComponentMask, iRebase); - } - - switch(psOperand->eModifier) - { - case OPERAND_MODIFIER_NONE: - { - break; - } - case OPERAND_MODIFIER_NEG: - { - bcatcstr(glsl, ")"); - break; - } - case OPERAND_MODIFIER_ABS: - { - bcatcstr(glsl, ")"); - break; - } - case OPERAND_MODIFIER_ABSNEG: - { - bcatcstr(glsl, ")"); - break; - } - } - + uint32_t ui32IgnoreSwizzle = 0; + int iRebase = 0; + + // in single-component mode there is no need to use mask + if (psOperand->eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE) + ui32ComponentMask = OPERAND_4_COMPONENT_MASK_ALL; + + if (psContext->psShader->ui32MajorVersion <= 3) + { + ui32TOFlag &= ~(TO_AUTO_BITCAST_TO_FLOAT | TO_AUTO_BITCAST_TO_INT | TO_AUTO_BITCAST_TO_UINT); + } + + if (!HaveUnsignedTypes(psContext->psShader->eTargetLanguage) && (ui32TOFlag & TO_FLAG_UNSIGNED_INTEGER)) + { + ui32TOFlag &= ~TO_FLAG_UNSIGNED_INTEGER; + ui32TOFlag |= TO_FLAG_INTEGER; + } + + if (ui32TOFlag & TO_FLAG_NAME_ONLY) + { + TranslateVariableNameWithMask(glsl, psOperand, ui32TOFlag, &ui32IgnoreSwizzle, OPERAND_4_COMPONENT_MASK_ALL, &iRebase); + return; + } + + switch (psOperand->eModifier) + { + case OPERAND_MODIFIER_NONE: + { + break; + } + case OPERAND_MODIFIER_NEG: + { + bcatcstr(glsl, "(-"); + break; + } + case OPERAND_MODIFIER_ABS: + { + bcatcstr(glsl, "abs("); + break; + } + case OPERAND_MODIFIER_ABSNEG: + { + bcatcstr(glsl, "-abs("); + break; + } + } + + TranslateVariableNameWithMask(glsl, psOperand, ui32TOFlag, &ui32IgnoreSwizzle, ui32ComponentMask, &iRebase); + + if (psContext->psShader->eShaderType == HULL_SHADER && psOperand->eType == OPERAND_TYPE_OUTPUT && + psOperand->ui32RegisterNumber != 0 && psOperand->iArrayElements != 0 && psOperand->eIndexRep[0] != OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE + && psContext->psShader->asPhases[psContext->currentPhase].ePhase == HS_CTRL_POINT_PHASE) + { + bcatcstr(glsl, "[gl_InvocationID]"); + } + + if (!ui32IgnoreSwizzle) + { + TranslateOperandSwizzleWithMask(glsl, psContext, psOperand, ui32ComponentMask, iRebase); + } + + switch (psOperand->eModifier) + { + case OPERAND_MODIFIER_NONE: + { + break; + } + case OPERAND_MODIFIER_NEG: + { + bcatcstr(glsl, ")"); + break; + } + case OPERAND_MODIFIER_ABS: + { + bcatcstr(glsl, ")"); + break; + } + case OPERAND_MODIFIER_ABSNEG: + { + bcatcstr(glsl, ")"); + break; + } + } } std::string ResourceName(HLSLCrossCompilerContext* psContext, ResourceGroup group, const uint32_t ui32RegisterNumber, const int bZCompare) { - std::ostringstream oss; - const ResourceBinding* psBinding = 0; - int found; - - found = psContext->psShader->sInfo.GetResourceFromBindingPoint(group, ui32RegisterNumber, &psBinding); - - if(bZCompare) - { - oss << "hlslcc_zcmp"; - } - - if(found) - { - int i = 0; - std::string name = psBinding->name; - uint32_t ui32ArrayOffset = ui32RegisterNumber - psBinding->ui32BindPoint; - - while(i < name.length()) - { - //array syntax [X] becomes _0_ - //Otherwise declarations could end up as: - //uniform sampler2D SomeTextures[0]; - //uniform sampler2D SomeTextures[1]; - if(name[i] == '[' || name[i] == ']') - name[i] = '_'; - - ++i; - } - - if(ui32ArrayOffset) - { - oss << name << ui32ArrayOffset; - } - else - { - oss << name; - } - if (psContext->IsVulkan() && group == RGROUP_UAV) - oss << "_origX" << ui32RegisterNumber << "X"; - - } - else - { - oss << "UnknownResource" << ui32RegisterNumber; - } - std::string res = oss.str(); - // Prefix sampler names with 'sampler' unless it already starts with it - if (group == RGROUP_SAMPLER) - { - if (strncmp(res.c_str(), "sampler", 7) != 0) - res.insert(0, "sampler"); - } - - return res; + std::ostringstream oss; + const ResourceBinding* psBinding = 0; + int found; + + found = psContext->psShader->sInfo.GetResourceFromBindingPoint(group, ui32RegisterNumber, &psBinding); + + if (bZCompare) + { + oss << "hlslcc_zcmp"; + } + + if (found) + { + int i = 0; + std::string name = psBinding->name; + uint32_t ui32ArrayOffset = ui32RegisterNumber - psBinding->ui32BindPoint; + + while (i < name.length()) + { + //array syntax [X] becomes _0_ + //Otherwise declarations could end up as: + //uniform sampler2D SomeTextures[0]; + //uniform sampler2D SomeTextures[1]; + if (name[i] == '[' || name[i] == ']') + name[i] = '_'; + + ++i; + } + + if (ui32ArrayOffset) + { + oss << name << ui32ArrayOffset; + } + else + { + oss << name; + } + if (psContext->IsVulkan() && group == RGROUP_UAV) + oss << "_origX" << ui32RegisterNumber << "X"; + } + else + { + oss << "UnknownResource" << ui32RegisterNumber; + } + std::string res = oss.str(); + // Prefix sampler names with 'sampler' unless it already starts with it + if (group == RGROUP_SAMPLER) + { + if (strncmp(res.c_str(), "sampler", 7) != 0) + res.insert(0, "sampler"); + } + + return res; } + void ResourceName(bstring targetStr, HLSLCrossCompilerContext* psContext, ResourceGroup group, const uint32_t ui32RegisterNumber, const int bZCompare) { - bstring glsl = (targetStr == NULL) ? *psContext->currentGLSLString : targetStr; - std::string res = ResourceName(psContext, group, ui32RegisterNumber, bZCompare); - bcatcstr(glsl, res.c_str()); + bstring glsl = (targetStr == NULL) ? *psContext->currentGLSLString : targetStr; + std::string res = ResourceName(psContext, group, ui32RegisterNumber, bZCompare); + bcatcstr(glsl, res.c_str()); } std::string TextureSamplerName(ShaderInfo* psShaderInfo, const uint32_t ui32TextureRegisterNumber, const uint32_t ui32SamplerRegisterNumber, const int bZCompare) { - std::ostringstream oss; - const ResourceBinding* psTextureBinding = 0; - const ResourceBinding* psSamplerBinding = 0; - int foundTexture, foundSampler; - uint32_t i = 0; - uint32_t ui32ArrayOffset; - - foundTexture = psShaderInfo->GetResourceFromBindingPoint(RGROUP_TEXTURE, ui32TextureRegisterNumber, &psTextureBinding); - foundSampler = psShaderInfo->GetResourceFromBindingPoint(RGROUP_SAMPLER, ui32SamplerRegisterNumber, &psSamplerBinding); - - if (!foundTexture || !foundSampler) - { - oss << "UnknownResource" << ui32TextureRegisterNumber << "_" << ui32SamplerRegisterNumber; - return oss.str(); - } - - ui32ArrayOffset = ui32TextureRegisterNumber - psTextureBinding->ui32BindPoint; - - std::string texName = psTextureBinding->name; - - while (i < texName.length()) - { - //array syntax [X] becomes _0_ - //Otherwise declarations could end up as: - //uniform sampler2D SomeTextures[0]; - //uniform sampler2D SomeTextures[1]; - if(texName[i] == '[' || texName[i] == ']') - { - texName[i] = '_'; - } - - ++i; - } - - - if(bZCompare) - { - oss << "hlslcc_zcmp"; - } - - - if(ui32ArrayOffset) - { - oss << texName << ui32ArrayOffset << "TEX_with_SMP" << psSamplerBinding->name; - } - else - { - oss << texName << "TEX_with_SMP" << psSamplerBinding->name; - } - - return oss.str(); + std::ostringstream oss; + const ResourceBinding* psTextureBinding = 0; + const ResourceBinding* psSamplerBinding = 0; + int foundTexture, foundSampler; + uint32_t i = 0; + uint32_t ui32ArrayOffset; + + foundTexture = psShaderInfo->GetResourceFromBindingPoint(RGROUP_TEXTURE, ui32TextureRegisterNumber, &psTextureBinding); + foundSampler = psShaderInfo->GetResourceFromBindingPoint(RGROUP_SAMPLER, ui32SamplerRegisterNumber, &psSamplerBinding); + + if (!foundTexture || !foundSampler) + { + oss << "UnknownResource" << ui32TextureRegisterNumber << "_" << ui32SamplerRegisterNumber; + return oss.str(); + } + + ui32ArrayOffset = ui32TextureRegisterNumber - psTextureBinding->ui32BindPoint; + + std::string texName = psTextureBinding->name; + + while (i < texName.length()) + { + //array syntax [X] becomes _0_ + //Otherwise declarations could end up as: + //uniform sampler2D SomeTextures[0]; + //uniform sampler2D SomeTextures[1]; + if (texName[i] == '[' || texName[i] == ']') + { + texName[i] = '_'; + } + + ++i; + } + + + if (bZCompare) + { + oss << "hlslcc_zcmp"; + } + + + if (ui32ArrayOffset) + { + oss << texName << ui32ArrayOffset << "TEX_with_SMP" << psSamplerBinding->name; + } + else + { + oss << texName << "TEX_with_SMP" << psSamplerBinding->name; + } + + return oss.str(); } void ConcatTextureSamplerName(bstring str, ShaderInfo* psShaderInfo, const uint32_t ui32TextureRegisterNumber, const uint32_t ui32SamplerRegisterNumber, const int bZCompare) { - std::string texturesamplername = TextureSamplerName(psShaderInfo, ui32TextureRegisterNumber, ui32SamplerRegisterNumber, bZCompare); - bcatcstr(str, texturesamplername.c_str()); + std::string texturesamplername = TextureSamplerName(psShaderInfo, ui32TextureRegisterNumber, ui32SamplerRegisterNumber, bZCompare); + bcatcstr(str, texturesamplername.c_str()); } // Take an uniform buffer name and generate an instance name. std::string UniformBufferInstanceName(HLSLCrossCompilerContext* psContext, const std::string& name) { - if (name == "$Globals") - { - char prefix = 'A'; - // Need to tweak Globals struct name to prevent clashes between shader stages - switch (psContext->psShader->eShaderType) - { - default: - ASSERT(0); - break; - case COMPUTE_SHADER: - prefix = 'C'; - break; - case VERTEX_SHADER: - prefix = 'V'; - break; - case PIXEL_SHADER: - prefix = 'P'; - break; - case GEOMETRY_SHADER: - prefix = 'G'; - break; - case HULL_SHADER: - prefix = 'H'; - break; - case DOMAIN_SHADER: - prefix = 'D'; - break; - } - - return std::string("_") + prefix + name.substr(1); - } - else - return std::string("_") + name; + if (name == "$Globals") + { + char prefix = 'A'; + // Need to tweak Globals struct name to prevent clashes between shader stages + switch (psContext->psShader->eShaderType) + { + default: + ASSERT(0); + break; + case COMPUTE_SHADER: + prefix = 'C'; + break; + case VERTEX_SHADER: + prefix = 'V'; + break; + case PIXEL_SHADER: + prefix = 'P'; + break; + case GEOMETRY_SHADER: + prefix = 'G'; + break; + case HULL_SHADER: + prefix = 'H'; + break; + case DOMAIN_SHADER: + prefix = 'D'; + break; + } + + return std::string("_") + prefix + name.substr(1); + } + else + return std::string("_") + name; } diff --git a/src/toMetal.cpp b/src/toMetal.cpp index e8d1cff..b695a77 100644 --- a/src/toMetal.cpp +++ b/src/toMetal.cpp @@ -1,4 +1,3 @@ - #include "internal_includes/toMetal.h" #include "internal_includes/HLSLCrossCompilerContext.h" #include "internal_includes/Shader.h" @@ -12,858 +11,892 @@ static void PrintStructDeclaration(HLSLCrossCompilerContext *psContext, bstring glsl, std::string &sname, StructDefinitions &defs) { - StructDefinition &d = defs[sname]; - if (d.m_IsPrinted) - return; - d.m_IsPrinted = true; - - - std::for_each(d.m_Dependencies.begin(), d.m_Dependencies.end(), [&psContext, &glsl, &defs](std::string &depName) - { - PrintStructDeclaration(psContext, glsl, depName, defs); - }); - - bformata(glsl, "struct %s\n{\n", sname.c_str()); - psContext->indent++; - std::for_each(d.m_Members.begin(), d.m_Members.end(), [&psContext, &glsl](const MemberDefinitions::value_type &mem) - { - psContext->AddIndentation(); - bcatcstr(glsl, mem.second.c_str()); - bcatcstr(glsl, ";\n"); - }); - - psContext->indent--; - bcatcstr(glsl, "};\n\n"); + StructDefinition &d = defs[sname]; + if (d.m_IsPrinted) + return; + d.m_IsPrinted = true; + + + std::for_each(d.m_Dependencies.begin(), d.m_Dependencies.end(), [&psContext, &glsl, &defs](std::string &depName) + { + PrintStructDeclaration(psContext, glsl, depName, defs); + }); + + bformata(glsl, "struct %s\n{\n", sname.c_str()); + psContext->indent++; + std::for_each(d.m_Members.begin(), d.m_Members.end(), [&psContext, &glsl](const MemberDefinitions::value_type &mem) + { + psContext->AddIndentation(); + bcatcstr(glsl, mem.second.c_str()); + bcatcstr(glsl, ";\n"); + }); + + psContext->indent--; + bcatcstr(glsl, "};\n\n"); } void ToMetal::PrintStructDeclarations(StructDefinitions &defs, const char *name) { - bstring glsl = *psContext->currentGLSLString; - StructDefinition &args = defs[name]; - std::for_each(args.m_Dependencies.begin(), args.m_Dependencies.end(), [this, glsl, &defs](std::string &sname) - { - PrintStructDeclaration(psContext, glsl, sname, defs); - }); - + bstring glsl = *psContext->currentGLSLString; + StructDefinition &args = defs[name]; + std::for_each(args.m_Dependencies.begin(), args.m_Dependencies.end(), [this, glsl, &defs](std::string &sname) + { + PrintStructDeclaration(psContext, glsl, sname, defs); + }); } static const char * GetPhaseFuncName(SHADER_PHASE_TYPE eType) { - switch (eType) - { - default: - case MAIN_PHASE: return ""; - case HS_GLOBAL_DECL_PHASE: return "hs_global_decls"; - case HS_FORK_PHASE: return "fork_phase"; - case HS_CTRL_POINT_PHASE: return "control_point_phase"; - case HS_JOIN_PHASE: return "join_phase"; - } + switch (eType) + { + default: + case MAIN_PHASE: return ""; + case HS_GLOBAL_DECL_PHASE: return "hs_global_decls"; + case HS_FORK_PHASE: return "fork_phase"; + case HS_CTRL_POINT_PHASE: return "control_point_phase"; + case HS_JOIN_PHASE: return "join_phase"; + } } static void DoHullShaderPassthrough(HLSLCrossCompilerContext *psContext) { - uint32_t i; - bstring glsl = *psContext->currentGLSLString; + uint32_t i; + bstring glsl = *psContext->currentGLSLString; - for (i = 0; i < psContext->psShader->sInfo.psInputSignatures.size(); i++) - { - const ShaderInfo::InOutSignature *psSig = &psContext->psShader->sInfo.psInputSignatures[i]; + for (i = 0; i < psContext->psShader->sInfo.psInputSignatures.size(); i++) + { + const ShaderInfo::InOutSignature *psSig = &psContext->psShader->sInfo.psInputSignatures[i]; - psContext->AddIndentation(); - bformata(glsl, "%s%s%d = %scp[controlPointID].%s%d;\n", psContext->outputPrefix, psSig->semanticName.c_str(), psSig->ui32SemanticIndex, psContext->inputPrefix, psSig->semanticName.c_str(), psSig->ui32SemanticIndex); - } + psContext->AddIndentation(); + bformata(glsl, "%s%s%d = %scp[controlPointID].%s%d;\n", psContext->outputPrefix, psSig->semanticName.c_str(), psSig->ui32SemanticIndex, psContext->inputPrefix, psSig->semanticName.c_str(), psSig->ui32SemanticIndex); + } } bool ToMetal::Translate() { - bstring glsl; - uint32_t i; - Shader* psShader = psContext->psShader; - uint32_t ui32Phase; - - psContext->psTranslator = this; - - SetIOPrefixes(); - psShader->ExpandSWAPCs(); - psShader->ForcePositionToHighp(); - psShader->AnalyzeIOOverlap(); - psShader->FindUnusedGlobals(psContext->flags); - - psContext->indent = 0; - - glsl = bfromcstralloc(1024 * 10, ""); - bstring bodyglsl = bfromcstralloc(1024 * 10, ""); - - psContext->glsl = glsl; - for (i = 0; i < psShader->asPhases.size(); ++i) - { - psShader->asPhases[i].postShaderCode = bfromcstralloc(1024 * 5, ""); - psShader->asPhases[i].earlyMain = bfromcstralloc(1024 * 5, ""); - } - - psContext->currentGLSLString = &glsl; - psShader->eTargetLanguage = LANG_METAL; - psShader->extensions = NULL; - psContext->currentPhase = MAIN_PHASE; - - psContext->ClearDependencyData(); - - const SHADER_PHASE_TYPE ePhaseFuncCallOrder[3] = { HS_CTRL_POINT_PHASE, HS_FORK_PHASE, HS_JOIN_PHASE }; - uint32_t ui32PhaseCallIndex; - int hasControlPointPhase = 0; - - const int maxThreadsPerThreadGroup = 32; - int numPatchesInThreadGroup = 0; - bool hasControlPoint = false; - bool hasPatchConstant = false; - std::string tessVertexFunctionArguments; - - if ((psShader->eShaderType == HULL_SHADER || psShader->eShaderType == DOMAIN_SHADER) && (psContext->flags & HLSLCC_FLAG_METAL_TESSELLATION) != 0) - { - if (psContext->psDependencies) - { - m_StructDefinitions[""].m_Members = psContext->psDependencies->m_SharedFunctionMembers; - m_TextureSlots = psContext->psDependencies->m_SharedTextureSlots; - m_SamplerSlots = psContext->psDependencies->m_SharedSamplerSlots; - m_BufferSlots = psContext->psDependencies->m_SharedBufferSlots; - hasControlPoint = psContext->psDependencies->hasControlPoint; - hasPatchConstant = psContext->psDependencies->hasPatchConstant; - } - } - - ClampPartialPrecisions(); - - for (ui32Phase = 0; ui32Phase < psShader->asPhases.size(); ui32Phase++) - { - ShaderPhase &phase = psShader->asPhases[ui32Phase]; - phase.UnvectorizeImmMoves(); - psContext->DoDataTypeAnalysis(&phase); - phase.ResolveUAVProperties(); - ReserveUAVBindingSlots(&phase); // TODO: unify slot allocation code between gl/metal/vulkan - HLSLcc::DoLoopTransform(psContext, phase); - } - - psShader->PruneTempRegisters(); - - //Special case. Can have multiple phases. - if(psShader->eShaderType == HULL_SHADER) - { - psShader->ConsolidateHullTempVars(); - - // Find out if we have a passthrough hull shader - for (ui32Phase = 2; ui32Phase < psShader->asPhases.size(); ui32Phase++) - { - if (psShader->asPhases[ui32Phase].ePhase == HS_CTRL_POINT_PHASE) - hasControlPointPhase = 1; - } - } - - // Hull and Domain shaders get merged into vertex shader output - if (!(psShader->eShaderType == HULL_SHADER || psShader->eShaderType == DOMAIN_SHADER)) - { - if (psContext->flags & HLSLCC_FLAG_DISABLE_FASTMATH) - bcatcstr(glsl, "#define UNITY_DISABLE_FASTMATH\n"); - bcatcstr(glsl, "#include \n#include \nusing namespace metal;\n"); - bcatcstr(glsl, "\n#if !(__HAVE_FMA__)\n#define fma(a,b,c) ((a) * (b) + (c))\n#endif\n\n"); - } - - if (psShader->eShaderType == HULL_SHADER) - { - psContext->indent++; - - // Phase 1 is always the global decls phase, no instructions - for(i=0; i < psShader->asPhases[1].psDecl.size(); ++i) - { - TranslateDeclaration(&psShader->asPhases[1].psDecl[i]); - } - - if (hasControlPointPhase == 0) - { - DeclareHullShaderPassthrough(); - } - - for(ui32PhaseCallIndex=0; ui32PhaseCallIndex<3; ui32PhaseCallIndex++) - { - for (ui32Phase = 2; ui32Phase < psShader->asPhases.size(); ui32Phase++) - { - ShaderPhase *psPhase = &psShader->asPhases[ui32Phase]; - if (psPhase->ePhase != ePhaseFuncCallOrder[ui32PhaseCallIndex]) - continue; - psContext->currentPhase = ui32Phase; + bstring glsl; + uint32_t i; + Shader* psShader = psContext->psShader; + uint32_t ui32Phase; + + psContext->psTranslator = this; + + SetIOPrefixes(); + psShader->ExpandSWAPCs(); + psShader->ForcePositionToHighp(); + psShader->AnalyzeIOOverlap(); + psShader->FindUnusedGlobals(psContext->flags); + + psContext->indent = 0; + + glsl = bfromcstralloc(1024 * 10, ""); + bstring bodyglsl = bfromcstralloc(1024 * 10, ""); + + psContext->glsl = glsl; + for (i = 0; i < psShader->asPhases.size(); ++i) + { + psShader->asPhases[i].postShaderCode = bfromcstralloc(1024 * 5, ""); + psShader->asPhases[i].earlyMain = bfromcstralloc(1024 * 5, ""); + } + + psContext->currentGLSLString = &glsl; + psShader->eTargetLanguage = LANG_METAL; + psShader->extensions = NULL; + psContext->currentPhase = MAIN_PHASE; + + psContext->ClearDependencyData(); + + const SHADER_PHASE_TYPE ePhaseFuncCallOrder[3] = { HS_CTRL_POINT_PHASE, HS_FORK_PHASE, HS_JOIN_PHASE }; + uint32_t ui32PhaseCallIndex; + int hasControlPointPhase = 0; + + const int maxThreadsPerThreadGroup = 32; + int numPatchesInThreadGroup = 0; + bool hasControlPoint = false; + bool hasPatchConstant = false; + std::string tessVertexFunctionArguments; + + if ((psShader->eShaderType == HULL_SHADER || psShader->eShaderType == DOMAIN_SHADER) && (psContext->flags & HLSLCC_FLAG_METAL_TESSELLATION) != 0) + { + if (psContext->psDependencies) + { + m_StructDefinitions[""].m_Members = psContext->psDependencies->m_SharedFunctionMembers; + m_TextureSlots = psContext->psDependencies->m_SharedTextureSlots; + m_SamplerSlots = psContext->psDependencies->m_SharedSamplerSlots; + m_BufferSlots = psContext->psDependencies->m_SharedBufferSlots; + hasControlPoint = psContext->psDependencies->hasControlPoint; + hasPatchConstant = psContext->psDependencies->hasPatchConstant; + } + } + + ClampPartialPrecisions(); + + for (ui32Phase = 0; ui32Phase < psShader->asPhases.size(); ui32Phase++) + { + ShaderPhase &phase = psShader->asPhases[ui32Phase]; + phase.UnvectorizeImmMoves(); + psContext->DoDataTypeAnalysis(&phase); + phase.ResolveUAVProperties(); + ReserveUAVBindingSlots(&phase); // TODO: unify slot allocation code between gl/metal/vulkan + HLSLcc::DoLoopTransform(psContext, phase); + } + + psShader->PruneTempRegisters(); + + //Special case. Can have multiple phases. + if (psShader->eShaderType == HULL_SHADER) + { + psShader->ConsolidateHullTempVars(); + + // Find out if we have a passthrough hull shader + for (ui32Phase = 2; ui32Phase < psShader->asPhases.size(); ui32Phase++) + { + if (psShader->asPhases[ui32Phase].ePhase == HS_CTRL_POINT_PHASE) + hasControlPointPhase = 1; + } + } + + // Hull and Domain shaders get merged into vertex shader output + if (!(psShader->eShaderType == HULL_SHADER || psShader->eShaderType == DOMAIN_SHADER)) + { + if (psContext->flags & HLSLCC_FLAG_DISABLE_FASTMATH) + bcatcstr(glsl, "#define UNITY_DISABLE_FASTMATH\n"); + bcatcstr(glsl, "#include \n#include \nusing namespace metal;\n"); + bcatcstr(glsl, "\n#if !(__HAVE_FMA__)\n#define fma(a,b,c) ((a) * (b) + (c))\n#endif\n\n"); + } + + if (psShader->eShaderType == HULL_SHADER) + { + psContext->indent++; + + // Phase 1 is always the global decls phase, no instructions + for (i = 0; i < psShader->asPhases[1].psDecl.size(); ++i) + { + TranslateDeclaration(&psShader->asPhases[1].psDecl[i]); + } + + if (hasControlPointPhase == 0) + { + DeclareHullShaderPassthrough(); + } + + for (ui32PhaseCallIndex = 0; ui32PhaseCallIndex < 3; ui32PhaseCallIndex++) + { + for (ui32Phase = 2; ui32Phase < psShader->asPhases.size(); ui32Phase++) + { + ShaderPhase *psPhase = &psShader->asPhases[ui32Phase]; + if (psPhase->ePhase != ePhaseFuncCallOrder[ui32PhaseCallIndex]) + continue; + psContext->currentPhase = ui32Phase; #ifdef _DEBUG - // bformata(glsl, "//%s declarations\n", GetPhaseFuncName(psPhase->ePhase)); + // bformata(glsl, "//%s declarations\n", GetPhaseFuncName(psPhase->ePhase)); #endif - for (i = 0; i < psPhase->psDecl.size(); ++i) - { - TranslateDeclaration(&psPhase->psDecl[i]); - } - } - } - - psContext->indent--; - - numPatchesInThreadGroup = maxThreadsPerThreadGroup / std::max(psShader->sInfo.ui32TessInputControlPointCount, psShader->sInfo.ui32TessOutputControlPointCount); - } - else - { - for (i = 0; i < psShader->asPhases[0].psDecl.size(); ++i) - TranslateDeclaration(&psShader->asPhases[0].psDecl[i]); - - // Output default implementations for framebuffer index remap if needed - if (m_NeedFBOutputRemapDecl) - bcatcstr(glsl, "#ifndef XLT_REMAP_O\n\t#define XLT_REMAP_O {0, 1, 2, 3, 4, 5, 6, 7}\n#endif\nconstexpr constant uint xlt_remap_o[] = XLT_REMAP_O;\n"); - if (m_NeedFBInputRemapDecl) - bcatcstr(glsl, "#ifndef XLT_REMAP_I\n\t#define XLT_REMAP_I {0, 1, 2, 3, 4, 5, 6, 7}\n#endif\nconstexpr constant uint xlt_remap_i[] = XLT_REMAP_I;\n"); - - DeclareClipPlanes(&psShader->asPhases[0].psDecl[0], psShader->asPhases[0].psDecl.size()); - GenerateTexturesReflection(&psContext->m_Reflection); - } - - if (psShader->eShaderType == HULL_SHADER) - { - psContext->currentPhase = MAIN_PHASE; - - if (m_StructDefinitions["Mtl_ControlPoint"].m_Members.size() > 0) - { - hasControlPoint = true; - - m_StructDefinitions["Mtl_ControlPoint"].m_Dependencies.push_back("Mtl_ControlPoint"); - m_StructDefinitions["Mtl_ControlPointIn"].m_Dependencies.push_back("Mtl_ControlPointIn"); - PrintStructDeclarations(m_StructDefinitions, "Mtl_ControlPoint"); - PrintStructDeclarations(m_StructDefinitions, "Mtl_ControlPointIn"); - } - - if (m_StructDefinitions["Mtl_PatchConstant"].m_Members.size() > 0) - { - hasPatchConstant = true; - - m_StructDefinitions["Mtl_PatchConstant"].m_Dependencies.push_back("Mtl_PatchConstant"); - m_StructDefinitions["Mtl_PatchConstantIn"].m_Dependencies.push_back("Mtl_PatchConstantIn"); - PrintStructDeclarations(m_StructDefinitions, "Mtl_PatchConstant"); - PrintStructDeclarations(m_StructDefinitions, "Mtl_PatchConstantIn"); - } - - m_StructDefinitions["Mtl_KernelPatchInfo"].m_Members.push_back(std::make_pair("numPatches", "uint numPatches")); - m_StructDefinitions["Mtl_KernelPatchInfo"].m_Members.push_back(std::make_pair("numControlPointsPerPatch", "ushort numControlPointsPerPatch")); - - if (m_StructDefinitions["Mtl_KernelPatchInfo"].m_Members.size() > 0) - { - m_StructDefinitions["Mtl_KernelPatchInfo"].m_Dependencies.push_back("Mtl_KernelPatchInfo"); - PrintStructDeclarations(m_StructDefinitions, "Mtl_KernelPatchInfo"); - } - - if (m_StructDefinitions[GetInputStructName()].m_Members.size() > 0) - { - m_StructDefinitions[GetInputStructName()].m_Dependencies.push_back(GetInputStructName()); - - // Hack, we're reusing Mtl_VertexOut as an hull shader input array, so no need to declare original contents - m_StructDefinitions[GetInputStructName()].m_Members.clear(); - - bstring vertexOut = bfromcstr(""); - bformata(vertexOut, "Mtl_VertexOut cp[%d]", psShader->sInfo.ui32TessOutputControlPointCount); - m_StructDefinitions[GetInputStructName()].m_Members.push_back(std::make_pair("cp", (const char *) vertexOut->data)); - bdestroy(vertexOut); - } - - if(psContext->psDependencies) - { - for (auto itr = psContext->psDependencies->m_SharedFunctionMembers.begin(); itr != psContext->psDependencies->m_SharedFunctionMembers.end(); itr++) - { - tessVertexFunctionArguments += itr->first.c_str(); - tessVertexFunctionArguments += ", "; - } - } - } - - if (psShader->eShaderType == DOMAIN_SHADER) - { - // For preserving data layout, reuse Mtl_ControlPoint/Mtl_PatchConstant from hull shader - if (hasControlPoint) - m_StructDefinitions[GetInputStructName()].m_Members.push_back(std::make_pair("cp", "patch_control_point cp")); - if (hasPatchConstant) - m_StructDefinitions[GetInputStructName()].m_Members.push_back(std::make_pair("patch", "Mtl_PatchConstantIn patch")); - } - - if ((psShader->eShaderType == VERTEX_SHADER || psShader->eShaderType == HULL_SHADER || psShader->eShaderType == DOMAIN_SHADER) && (psContext->flags & HLSLCC_FLAG_METAL_TESSELLATION) != 0) - { - if (psContext->psDependencies) - { - psContext->psDependencies->m_SharedFunctionMembers = m_StructDefinitions[""].m_Members; - psContext->psDependencies->m_SharedTextureSlots = m_TextureSlots; - psContext->psDependencies->m_SharedTextureSlots.SaveTotalShaderStageAllocationsCount(); - psContext->psDependencies->m_SharedSamplerSlots = m_SamplerSlots; - psContext->psDependencies->m_SharedSamplerSlots.SaveTotalShaderStageAllocationsCount(); - psContext->psDependencies->m_SharedBufferSlots = m_BufferSlots; - psContext->psDependencies->m_SharedBufferSlots.SaveTotalShaderStageAllocationsCount(); - } - } - - if (m_StructDefinitions[GetInputStructName()].m_Members.size() > 0) - { - if (psShader->eShaderType == HULL_SHADER) - { - m_StructDefinitions[""].m_Members.push_back(std::make_pair("vertexInput", "Mtl_VertexIn vertexInput [[ stage_in ]]")); - m_StructDefinitions[""].m_Members.push_back(std::make_pair("tID", "uint2 tID [[ thread_position_in_grid ]]")); - m_StructDefinitions[""].m_Members.push_back(std::make_pair("groupID", "ushort2 groupID [[ threadgroup_position_in_grid ]]")); - - bstring buffer = bfromcstr(""); - uint32_t slot = 0; - - if (hasControlPoint) - { - slot = m_BufferSlots.GetBindingSlot(0xffff - 1, BindingSlotAllocator::ConstantBuffer); - bformata(buffer, "device Mtl_ControlPoint *controlPoints [[ buffer(%d) ]]", slot); - m_StructDefinitions[""].m_Members.push_back(std::make_pair("controlPoints", (const char *) buffer->data)); - btrunc(buffer, 0); - } - - if (hasPatchConstant) - { - slot = m_BufferSlots.GetBindingSlot(0xffff - 2, BindingSlotAllocator::ConstantBuffer); - bformata(buffer, "device Mtl_PatchConstant *patchConstants [[ buffer(%d) ]]", slot); - m_StructDefinitions[""].m_Members.push_back(std::make_pair("patchConstants", (const char *) buffer->data)); - btrunc(buffer, 0); - } - - slot = m_BufferSlots.GetBindingSlot(0xffff - 3, BindingSlotAllocator::ConstantBuffer); - bformata(buffer, "device %s *tessFactors [[ buffer(%d) ]]", psShader->sInfo.eTessDomain == TESSELLATOR_DOMAIN_QUAD ? "MTLQuadTessellationFactorsHalf" : "MTLTriangleTessellationFactorsHalf", slot); - m_StructDefinitions[""].m_Members.push_back(std::make_pair("tessFactors", (const char *) buffer->data)); - btrunc(buffer, 0); - - slot = m_BufferSlots.GetBindingSlot(0xffff - 4, BindingSlotAllocator::ConstantBuffer); - bformata(buffer, "constant Mtl_KernelPatchInfo &patchInfo [[ buffer(%d) ]]", slot); - m_StructDefinitions[""].m_Members.push_back(std::make_pair("patchInfo", (const char *) buffer->data)); - btrunc(buffer, 0); - - bdestroy(buffer); - } - else if (psShader->eShaderType == VERTEX_SHADER && (psContext->flags & HLSLCC_FLAG_METAL_TESSELLATION) != 0) - { - m_StructDefinitions[""].m_Members.push_back(std::make_pair("input", GetInputStructName() + " input")); - } - else - { - m_StructDefinitions[""].m_Members.push_back(std::make_pair("input", GetInputStructName() + " input [[ stage_in ]]")); - } - - if ((psShader->eShaderType == VERTEX_SHADER || psShader->eShaderType == HULL_SHADER) && (psContext->flags & HLSLCC_FLAG_METAL_TESSELLATION) != 0) - { - // m_StructDefinitions is inherited between tessellation shader stages but some builtins need exceptions - std::for_each(m_StructDefinitions[""].m_Members.begin(), m_StructDefinitions[""].m_Members.end(), [&psShader](MemberDefinitions::value_type &mem) - { - if (mem.first == "mtl_InstanceID") - { - if (psShader->eShaderType == VERTEX_SHADER) - mem.second.assign("uint mtl_InstanceID"); - else if (psShader->eShaderType == HULL_SHADER) - mem.second.assign("// mtl_InstanceID passed through groupID"); - } - }); - - } - - m_StructDefinitions[""].m_Dependencies.push_back(GetInputStructName()); - } - - if (psShader->eShaderType != COMPUTE_SHADER) - { - if (m_StructDefinitions[GetOutputStructName()].m_Members.size() > 0) - { - m_StructDefinitions[""].m_Dependencies.push_back(GetOutputStructName()); - } - } - - PrintStructDeclarations(m_StructDefinitions); - - psContext->currentGLSLString = &bodyglsl; - - bool popPragmaDiagnostic = false; - if (psShader->eShaderType == HULL_SHADER || psShader->eShaderType == DOMAIN_SHADER) - { - popPragmaDiagnostic = true; - - bcatcstr(bodyglsl, "#pragma clang diagnostic push\n"); - bcatcstr(bodyglsl, "#pragma clang diagnostic ignored \"-Wunused-parameter\"\n"); - } - - switch (psShader->eShaderType) - { - case VERTEX_SHADER: - if ((psContext->flags & HLSLCC_FLAG_METAL_TESSELLATION) == 0) - bcatcstr(bodyglsl, "vertex Mtl_VertexOut xlatMtlMain(\n"); - else - bcatcstr(bodyglsl, "static Mtl_VertexOut vertexFunction(\n"); - break; - case PIXEL_SHADER: - if (psShader->sInfo.bEarlyFragmentTests) - bcatcstr(bodyglsl, "[[early_fragment_tests]]\n"); - bcatcstr(bodyglsl, "fragment Mtl_FragmentOut xlatMtlMain(\n"); - break; - case COMPUTE_SHADER: - bcatcstr(bodyglsl, "kernel void computeMain(\n"); - break; - case HULL_SHADER: - bcatcstr(bodyglsl, "kernel void patchKernel(\n"); - break; - case DOMAIN_SHADER: - { - const char *patchType = psShader->sInfo.eTessDomain == TESSELLATOR_DOMAIN_QUAD ? "quad" : "triangle"; - uint32_t patchCount = psShader->sInfo.ui32TessOutputControlPointCount; - bformata(bodyglsl, "[[patch(%s, %d)]] vertex Mtl_VertexOutPostTess xlatMtlMain(\n", patchType, patchCount); - break; - } - default: - // Not supported - ASSERT(0); - return false; - } - - psContext->indent++; - for (auto itr = m_StructDefinitions[""].m_Members.begin(); ;) - { - if (itr == m_StructDefinitions[""].m_Members.end()) - break; - - psContext->AddIndentation(); - bcatcstr(bodyglsl, itr->second.c_str()); - - itr++; - if (itr != m_StructDefinitions[""].m_Members.end()) - bcatcstr(bodyglsl, ",\n"); - } - - bcatcstr(bodyglsl, ")\n{\n"); - - if (popPragmaDiagnostic) - bcatcstr(bodyglsl, "#pragma clang diagnostic pop\n"); - - if (psShader->eShaderType != COMPUTE_SHADER) - { - if (m_StructDefinitions[GetOutputStructName().c_str()].m_Members.size() > 0) - { - psContext->AddIndentation(); - bcatcstr(bodyglsl, GetOutputStructName().c_str()); - bcatcstr(bodyglsl, " output;\n"); - } - } - - if (psShader->eShaderType == HULL_SHADER) - { - if (hasPatchConstant) - { - psContext->AddIndentation(); - bcatcstr(bodyglsl, "Mtl_PatchConstant patch;\n"); - } - - psContext->AddIndentation(); - bformata(bodyglsl, "const uint numPatchesInThreadGroup = %d;\n", numPatchesInThreadGroup); // Hardcoded because of threadgroup array below - psContext->AddIndentation(); - bcatcstr(bodyglsl, "const uint patchID = (tID.x / patchInfo.numControlPointsPerPatch);\n"); - psContext->AddIndentation(); - bcatcstr(bodyglsl, "const bool patchValid = (patchID < patchInfo.numPatches);\n"); - - psContext->AddIndentation(); - bcatcstr(bodyglsl, "const uint mtl_InstanceID = groupID.y;\n"); - psContext->AddIndentation(); - bcatcstr(bodyglsl, "const uint internalPatchID = mtl_InstanceID * patchInfo.numPatches + patchID;\n"); - psContext->AddIndentation(); - bcatcstr(bodyglsl, "const uint patchIDInThreadGroup = (patchID % numPatchesInThreadGroup);\n"); - - psContext->AddIndentation(); - bcatcstr(bodyglsl, "const uint controlPointID = (tID.x % patchInfo.numControlPointsPerPatch);\n"); - psContext->AddIndentation(); - bcatcstr(bodyglsl, "const uint internalControlPointID = (mtl_InstanceID * (patchInfo.numControlPointsPerPatch * patchInfo.numPatches)) + tID.x;\n"); - - psContext->AddIndentation(); - bformata(bodyglsl, "threadgroup %s inputGroup[numPatchesInThreadGroup];\n", GetInputStructName().c_str()); - psContext->AddIndentation(); - bformata(bodyglsl, "threadgroup %s &input = inputGroup[patchIDInThreadGroup];\n", GetInputStructName().c_str()); - - psContext->AddIndentation(); - std::string tessFactorBufferType = psShader->sInfo.eTessDomain == TESSELLATOR_DOMAIN_QUAD ? "MTLQuadTessellationFactorsHalf" : "MTLTriangleTessellationFactorsHalf"; - bformata(bodyglsl, "%s tessFactor;\n", tessFactorBufferType.c_str()); - } - - // There are cases when there are no control point phases and we have to do passthrough - if (psShader->eShaderType == HULL_SHADER && hasControlPointPhase == 0) - { - psContext->AddIndentation(); - bcatcstr(bodyglsl, "if (patchValid) {\n"); - psContext->indent++; - - // Passthrough control point phase, run the rest only once per patch - psContext->AddIndentation(); - bformata(bodyglsl, "input.cp[controlPointID] = vertexFunction(%svertexInput);\n", tessVertexFunctionArguments.c_str()); - - DoHullShaderPassthrough(psContext); - - psContext->indent--; - psContext->AddIndentation(); - bcatcstr(bodyglsl, "}\n"); - - psContext->AddIndentation(); - bcatcstr(bodyglsl, "threadgroup_barrier(mem_flags::mem_threadgroup);\n"); - - psContext->AddIndentation(); - bcatcstr(bodyglsl, "if (!patchValid) {\n"); - psContext->indent++; - psContext->AddIndentation(); - bcatcstr(bodyglsl, "return;\n"); - psContext->indent--; - psContext->AddIndentation(); - bcatcstr(bodyglsl, "}\n"); - } - - if (psShader->eShaderType == HULL_SHADER) - { - for(ui32PhaseCallIndex=0; ui32PhaseCallIndex<3; ui32PhaseCallIndex++) - { - for (ui32Phase = 2; ui32Phase < psShader->asPhases.size(); ui32Phase++) - { - uint32_t i; - ShaderPhase *psPhase = &psShader->asPhases[ui32Phase]; - if (psPhase->ePhase != ePhaseFuncCallOrder[ui32PhaseCallIndex]) - continue; - psContext->currentPhase = ui32Phase; - - if (psPhase->earlyMain->slen > 1) - { + for (i = 0; i < psPhase->psDecl.size(); ++i) + { + TranslateDeclaration(&psPhase->psDecl[i]); + } + } + } + + psContext->indent--; + + numPatchesInThreadGroup = maxThreadsPerThreadGroup / std::max(psShader->sInfo.ui32TessInputControlPointCount, psShader->sInfo.ui32TessOutputControlPointCount); + } + else + { + for (i = 0; i < psShader->asPhases[0].psDecl.size(); ++i) + TranslateDeclaration(&psShader->asPhases[0].psDecl[i]); + + // Output default implementations for framebuffer index remap if needed + if (m_NeedFBOutputRemapDecl) + bcatcstr(glsl, "#ifndef XLT_REMAP_O\n\t#define XLT_REMAP_O {0, 1, 2, 3, 4, 5, 6, 7}\n#endif\nconstexpr constant uint xlt_remap_o[] = XLT_REMAP_O;\n"); + if (m_NeedFBInputRemapDecl) + bcatcstr(glsl, "#ifndef XLT_REMAP_I\n\t#define XLT_REMAP_I {0, 1, 2, 3, 4, 5, 6, 7}\n#endif\nconstexpr constant uint xlt_remap_i[] = XLT_REMAP_I;\n"); + + DeclareClipPlanes(&psShader->asPhases[0].psDecl[0], psShader->asPhases[0].psDecl.size()); + GenerateTexturesReflection(&psContext->m_Reflection); + } + + if (psShader->eShaderType == HULL_SHADER) + { + psContext->currentPhase = MAIN_PHASE; + + if (m_StructDefinitions["Mtl_ControlPoint"].m_Members.size() > 0) + { + hasControlPoint = true; + + m_StructDefinitions["Mtl_ControlPoint"].m_Dependencies.push_back("Mtl_ControlPoint"); + m_StructDefinitions["Mtl_ControlPointIn"].m_Dependencies.push_back("Mtl_ControlPointIn"); + PrintStructDeclarations(m_StructDefinitions, "Mtl_ControlPoint"); + PrintStructDeclarations(m_StructDefinitions, "Mtl_ControlPointIn"); + } + + if (m_StructDefinitions["Mtl_PatchConstant"].m_Members.size() > 0) + { + hasPatchConstant = true; + + m_StructDefinitions["Mtl_PatchConstant"].m_Dependencies.push_back("Mtl_PatchConstant"); + m_StructDefinitions["Mtl_PatchConstantIn"].m_Dependencies.push_back("Mtl_PatchConstantIn"); + PrintStructDeclarations(m_StructDefinitions, "Mtl_PatchConstant"); + PrintStructDeclarations(m_StructDefinitions, "Mtl_PatchConstantIn"); + } + + m_StructDefinitions["Mtl_KernelPatchInfo"].m_Members.push_back(std::make_pair("numPatches", "uint numPatches")); + m_StructDefinitions["Mtl_KernelPatchInfo"].m_Members.push_back(std::make_pair("numControlPointsPerPatch", "ushort numControlPointsPerPatch")); + + if (m_StructDefinitions["Mtl_KernelPatchInfo"].m_Members.size() > 0) + { + m_StructDefinitions["Mtl_KernelPatchInfo"].m_Dependencies.push_back("Mtl_KernelPatchInfo"); + PrintStructDeclarations(m_StructDefinitions, "Mtl_KernelPatchInfo"); + } + + if (m_StructDefinitions[GetInputStructName()].m_Members.size() > 0) + { + m_StructDefinitions[GetInputStructName()].m_Dependencies.push_back(GetInputStructName()); + if (psContext->psDependencies) + psContext->psDependencies->m_SharedDependencies.push_back(GetInputStructName()); + + // Hack, we're reusing Mtl_VertexOut as an hull shader input array, so no need to declare original contents + m_StructDefinitions[GetInputStructName()].m_Members.clear(); + + bstring vertexOut = bfromcstr(""); + bformata(vertexOut, "Mtl_VertexOut cp[%d]", psShader->sInfo.ui32TessOutputControlPointCount); + m_StructDefinitions[GetInputStructName()].m_Members.push_back(std::make_pair("cp", (const char *)vertexOut->data)); + bdestroy(vertexOut); + } + + if (psContext->psDependencies) + { + for (auto i = psContext->psDependencies->m_SharedFunctionMembers.begin(), in = psContext->psDependencies->m_SharedFunctionMembers.end(); i != in;) + { + tessVertexFunctionArguments += i->first.c_str(); + ++i; + + // we want to avoid trailing comma + if (i != in) + tessVertexFunctionArguments += ", "; + } + } + } + + if (psShader->eShaderType == DOMAIN_SHADER) + { + // For preserving data layout, reuse Mtl_ControlPoint/Mtl_PatchConstant from hull shader + if (hasControlPoint) + m_StructDefinitions[GetInputStructName()].m_Members.push_back(std::make_pair("cp", "patch_control_point cp")); + if (hasPatchConstant) + m_StructDefinitions[GetInputStructName()].m_Members.push_back(std::make_pair("patch", "Mtl_PatchConstantIn patch")); + } + + if ((psShader->eShaderType == VERTEX_SHADER || psShader->eShaderType == HULL_SHADER || psShader->eShaderType == DOMAIN_SHADER) && (psContext->flags & HLSLCC_FLAG_METAL_TESSELLATION) != 0) + { + if (psContext->psDependencies) + { + psContext->psDependencies->m_SharedFunctionMembers = m_StructDefinitions[""].m_Members; + psContext->psDependencies->m_SharedTextureSlots = m_TextureSlots; + psContext->psDependencies->m_SharedTextureSlots.SaveTotalShaderStageAllocationsCount(); + psContext->psDependencies->m_SharedSamplerSlots = m_SamplerSlots; + psContext->psDependencies->m_SharedSamplerSlots.SaveTotalShaderStageAllocationsCount(); + psContext->psDependencies->m_SharedBufferSlots = m_BufferSlots; + psContext->psDependencies->m_SharedBufferSlots.SaveTotalShaderStageAllocationsCount(); + } + } + + if (m_StructDefinitions[GetInputStructName()].m_Members.size() > 0) + { + if (psShader->eShaderType == HULL_SHADER) + { + if (psContext->psDependencies) + { + // if we go for fully procedural geometry we might end up without Mtl_VertexIn + for (std::vector::const_iterator itr = psContext->psDependencies->m_SharedDependencies.begin(); itr != psContext->psDependencies->m_SharedDependencies.end(); itr++) + { + if (*itr == "Mtl_VertexIn") + { + m_StructDefinitions[""].m_Members.push_back(std::make_pair("vertexInput", "Mtl_VertexIn vertexInput [[ stage_in ]]")); + if (tessVertexFunctionArguments.length()) + tessVertexFunctionArguments += ", "; + tessVertexFunctionArguments += "vertexInput"; + break; + } + } + } + + m_StructDefinitions[""].m_Members.push_back(std::make_pair("tID", "uint2 tID [[ thread_position_in_grid ]]")); + m_StructDefinitions[""].m_Members.push_back(std::make_pair("groupID", "ushort2 groupID [[ threadgroup_position_in_grid ]]")); + + bstring buffer = bfromcstr(""); + uint32_t slot = 0; + + if (hasControlPoint) + { + slot = m_BufferSlots.GetBindingSlot(0xffff - 1, BindingSlotAllocator::ConstantBuffer); + bformata(buffer, "device Mtl_ControlPoint *controlPoints [[ buffer(%d) ]]", slot); + m_StructDefinitions[""].m_Members.push_back(std::make_pair("controlPoints", (const char *)buffer->data)); + btrunc(buffer, 0); + } + + if (hasPatchConstant) + { + slot = m_BufferSlots.GetBindingSlot(0xffff - 2, BindingSlotAllocator::ConstantBuffer); + bformata(buffer, "device Mtl_PatchConstant *patchConstants [[ buffer(%d) ]]", slot); + m_StructDefinitions[""].m_Members.push_back(std::make_pair("patchConstants", (const char *)buffer->data)); + btrunc(buffer, 0); + } + + slot = m_BufferSlots.GetBindingSlot(0xffff - 3, BindingSlotAllocator::ConstantBuffer); + bformata(buffer, "device %s *tessFactors [[ buffer(%d) ]]", psShader->sInfo.eTessDomain == TESSELLATOR_DOMAIN_QUAD ? "MTLQuadTessellationFactorsHalf" : "MTLTriangleTessellationFactorsHalf", slot); + m_StructDefinitions[""].m_Members.push_back(std::make_pair("tessFactors", (const char *)buffer->data)); + btrunc(buffer, 0); + + slot = m_BufferSlots.GetBindingSlot(0xffff - 4, BindingSlotAllocator::ConstantBuffer); + bformata(buffer, "constant Mtl_KernelPatchInfo &patchInfo [[ buffer(%d) ]]", slot); + m_StructDefinitions[""].m_Members.push_back(std::make_pair("patchInfo", (const char *)buffer->data)); + btrunc(buffer, 0); + + bdestroy(buffer); + } + else if (psShader->eShaderType == VERTEX_SHADER && (psContext->flags & HLSLCC_FLAG_METAL_TESSELLATION) != 0) + { + m_StructDefinitions[""].m_Members.push_back(std::make_pair("input", GetInputStructName() + " input")); + } + else + { + m_StructDefinitions[""].m_Members.push_back(std::make_pair("input", GetInputStructName() + " input [[ stage_in ]]")); + } + + m_StructDefinitions[""].m_Dependencies.push_back(GetInputStructName()); + if (psContext->psDependencies) + psContext->psDependencies->m_SharedDependencies.push_back(GetInputStructName()); + } + + if ((psShader->eShaderType == VERTEX_SHADER || psShader->eShaderType == HULL_SHADER || psShader->eShaderType == DOMAIN_SHADER) && (psContext->flags & HLSLCC_FLAG_METAL_TESSELLATION) != 0) + { + // m_StructDefinitions is inherited between tessellation shader stages but some builtins need exceptions + std::for_each(m_StructDefinitions[""].m_Members.begin(), m_StructDefinitions[""].m_Members.end(), [&psShader](MemberDefinitions::value_type &mem) + { + if (mem.first == "mtl_InstanceID") + { + if (psShader->eShaderType == VERTEX_SHADER) + mem.second.assign("uint mtl_InstanceID"); + else if (psShader->eShaderType == HULL_SHADER) + mem.second.assign("// mtl_InstanceID passed through groupID"); + } + else if (mem.first == "mtl_VertexID") + { + if (psShader->eShaderType == VERTEX_SHADER) + mem.second.assign("uint mtl_VertexID"); + else if (psShader->eShaderType == HULL_SHADER) + mem.second.assign("// mtl_VertexID generated in compute kernel"); + else if (psShader->eShaderType == DOMAIN_SHADER) + mem.second.assign("// mtl_VertexID unused"); + } + }); + } + + if (psShader->eShaderType != COMPUTE_SHADER) + { + if (m_StructDefinitions[GetOutputStructName()].m_Members.size() > 0) + { + m_StructDefinitions[""].m_Dependencies.push_back(GetOutputStructName()); + if (psContext->psDependencies) + psContext->psDependencies->m_SharedDependencies.push_back(GetOutputStructName()); + } + } + + PrintStructDeclarations(m_StructDefinitions); + + psContext->currentGLSLString = &bodyglsl; + + bool popPragmaDiagnostic = false; + if (psShader->eShaderType == HULL_SHADER || psShader->eShaderType == DOMAIN_SHADER) + { + popPragmaDiagnostic = true; + + bcatcstr(bodyglsl, "#pragma clang diagnostic push\n"); + bcatcstr(bodyglsl, "#pragma clang diagnostic ignored \"-Wunused-parameter\"\n"); + } + + switch (psShader->eShaderType) + { + case VERTEX_SHADER: + if ((psContext->flags & HLSLCC_FLAG_METAL_TESSELLATION) == 0) + bcatcstr(bodyglsl, "vertex Mtl_VertexOut xlatMtlMain(\n"); + else + bcatcstr(bodyglsl, "static Mtl_VertexOut vertexFunction(\n"); + break; + case PIXEL_SHADER: + if (psShader->sInfo.bEarlyFragmentTests) + bcatcstr(bodyglsl, "[[early_fragment_tests]]\n"); + if (m_StructDefinitions[GetOutputStructName()].m_Members.size() > 0) + bcatcstr(bodyglsl, "fragment Mtl_FragmentOut xlatMtlMain(\n"); + else + bcatcstr(bodyglsl, "fragment void xlatMtlMain(\n"); + break; + case COMPUTE_SHADER: + bcatcstr(bodyglsl, "kernel void computeMain(\n"); + break; + case HULL_SHADER: + bcatcstr(bodyglsl, "kernel void patchKernel(\n"); + break; + case DOMAIN_SHADER: + { + const char *patchType = psShader->sInfo.eTessDomain == TESSELLATOR_DOMAIN_QUAD ? "quad" : "triangle"; + uint32_t patchCount = psShader->sInfo.ui32TessOutputControlPointCount; + bformata(bodyglsl, "[[patch(%s, %d)]] vertex Mtl_VertexOutPostTess xlatMtlMain(\n", patchType, patchCount); + break; + } + default: + // Not supported + ASSERT(0); + return false; + } + + psContext->indent++; + for (auto itr = m_StructDefinitions[""].m_Members.begin();;) + { + if (itr == m_StructDefinitions[""].m_Members.end()) + break; + + psContext->AddIndentation(); + bcatcstr(bodyglsl, itr->second.c_str()); + + itr++; + if (itr != m_StructDefinitions[""].m_Members.end()) + bcatcstr(bodyglsl, ",\n"); + } + + bcatcstr(bodyglsl, ")\n{\n"); + + if (popPragmaDiagnostic) + bcatcstr(bodyglsl, "#pragma clang diagnostic pop\n"); + + if (psShader->eShaderType != COMPUTE_SHADER) + { + if (m_StructDefinitions[GetOutputStructName().c_str()].m_Members.size() > 0) + { + psContext->AddIndentation(); + bcatcstr(bodyglsl, GetOutputStructName().c_str()); + bcatcstr(bodyglsl, " output;\n"); + } + } + + if (psShader->eShaderType == HULL_SHADER) + { + if (hasPatchConstant) + { + psContext->AddIndentation(); + bcatcstr(bodyglsl, "Mtl_PatchConstant patch;\n"); + } + + psContext->AddIndentation(); + bformata(bodyglsl, "const uint numPatchesInThreadGroup = %d;\n", numPatchesInThreadGroup); // Hardcoded because of threadgroup array below + psContext->AddIndentation(); + bcatcstr(bodyglsl, "const uint patchID = (tID.x / patchInfo.numControlPointsPerPatch);\n"); + psContext->AddIndentation(); + bcatcstr(bodyglsl, "const bool patchValid = (patchID < patchInfo.numPatches);\n"); + + psContext->AddIndentation(); + bcatcstr(bodyglsl, "const uint mtl_InstanceID = groupID.y;\n"); + psContext->AddIndentation(); + bcatcstr(bodyglsl, "const uint internalPatchID = mtl_InstanceID * patchInfo.numPatches + patchID;\n"); + psContext->AddIndentation(); + bcatcstr(bodyglsl, "const uint patchIDInThreadGroup = (patchID % numPatchesInThreadGroup);\n"); + + psContext->AddIndentation(); + bcatcstr(bodyglsl, "const uint controlPointID = (tID.x % patchInfo.numControlPointsPerPatch);\n"); + psContext->AddIndentation(); + bcatcstr(bodyglsl, "const uint mtl_VertexID = (mtl_InstanceID * (patchInfo.numControlPointsPerPatch * patchInfo.numPatches)) + tID.x;\n"); + + psContext->AddIndentation(); + bformata(bodyglsl, "threadgroup %s inputGroup[numPatchesInThreadGroup];\n", GetInputStructName().c_str()); + psContext->AddIndentation(); + bformata(bodyglsl, "threadgroup %s &input = inputGroup[patchIDInThreadGroup];\n", GetInputStructName().c_str()); + + psContext->AddIndentation(); + std::string tessFactorBufferType = psShader->sInfo.eTessDomain == TESSELLATOR_DOMAIN_QUAD ? "MTLQuadTessellationFactorsHalf" : "MTLTriangleTessellationFactorsHalf"; + bformata(bodyglsl, "%s tessFactor;\n", tessFactorBufferType.c_str()); + } + + // There are cases when there are no control point phases and we have to do passthrough + if (psShader->eShaderType == HULL_SHADER && hasControlPointPhase == 0) + { + psContext->AddIndentation(); + bcatcstr(bodyglsl, "if (patchValid) {\n"); + psContext->indent++; + + // Passthrough control point phase, run the rest only once per patch + psContext->AddIndentation(); + bformata(bodyglsl, "input.cp[controlPointID] = vertexFunction(%s);\n", tessVertexFunctionArguments.c_str()); + + DoHullShaderPassthrough(psContext); + + psContext->indent--; + psContext->AddIndentation(); + bcatcstr(bodyglsl, "}\n"); + + psContext->AddIndentation(); + bcatcstr(bodyglsl, "threadgroup_barrier(mem_flags::mem_threadgroup);\n"); + + psContext->AddIndentation(); + bcatcstr(bodyglsl, "if (!patchValid) {\n"); + psContext->indent++; + psContext->AddIndentation(); + bcatcstr(bodyglsl, "return;\n"); + psContext->indent--; + psContext->AddIndentation(); + bcatcstr(bodyglsl, "}\n"); + } + + if (psShader->eShaderType == HULL_SHADER) + { + for (ui32PhaseCallIndex = 0; ui32PhaseCallIndex < 3; ui32PhaseCallIndex++) + { + for (ui32Phase = 2; ui32Phase < psShader->asPhases.size(); ui32Phase++) + { + uint32_t i; + ShaderPhase *psPhase = &psShader->asPhases[ui32Phase]; + if (psPhase->ePhase != ePhaseFuncCallOrder[ui32PhaseCallIndex]) + continue; + psContext->currentPhase = ui32Phase; + + if (psPhase->earlyMain->slen > 1) + { #ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(bodyglsl, "//--- Start Early Main ---\n"); + psContext->AddIndentation(); + bcatcstr(bodyglsl, "//--- Start Early Main ---\n"); #endif - bconcat(bodyglsl, psPhase->earlyMain); + bconcat(bodyglsl, psPhase->earlyMain); #ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(bodyglsl, "//--- End Early Main ---\n"); + psContext->AddIndentation(); + bcatcstr(bodyglsl, "//--- End Early Main ---\n"); #endif - } - - psContext->AddIndentation(); - bformata(bodyglsl, "// %s%d\n", GetPhaseFuncName(psShader->asPhases[ui32Phase].ePhase), ui32Phase); - if (psPhase->ui32InstanceCount > 1) - { - psContext->AddIndentation(); - bformata(bodyglsl, "for (int phaseInstanceID = 0; phaseInstanceID < %d; phaseInstanceID++) {\n", psPhase->ui32InstanceCount); - psContext->indent++; - } - else - { - if (psContext->currentPhase == HS_CTRL_POINT_PHASE && hasControlPointPhase == 1) - { - psContext->AddIndentation(); - bcatcstr(bodyglsl, "if (patchValid) {\n"); - psContext->indent++; - - psContext->AddIndentation(); - bformata(bodyglsl, "input.cp[controlPointID] = vertexFunction(%svertexInput);\n", tessVertexFunctionArguments.c_str()); - } - else - { - psContext->AddIndentation(); - bcatcstr(bodyglsl, "{\n"); - psContext->indent++; - } - } - - if (psPhase->psInst.size() > 0) - { - //The minus one here is remove the return statement at end of phases. - //We don't want to translate that, we'll just end the function body. - ASSERT(psPhase->psInst[psPhase->psInst.size() - 1].eOpcode == OPCODE_RET); - for (i = 0; i < psPhase->psInst.size() - 1; ++i) - { - TranslateInstruction(&psPhase->psInst[i]); - } - } - - psContext->indent--; - psContext->AddIndentation(); - bformata(bodyglsl, "}\n"); - - if (psPhase->hasPostShaderCode) - { + } + + psContext->AddIndentation(); + bformata(bodyglsl, "// %s%d\n", GetPhaseFuncName(psShader->asPhases[ui32Phase].ePhase), ui32Phase); + if (psPhase->ui32InstanceCount > 1) + { + psContext->AddIndentation(); + bformata(bodyglsl, "for (int phaseInstanceID = 0; phaseInstanceID < %d; phaseInstanceID++) {\n", psPhase->ui32InstanceCount); + psContext->indent++; + } + else + { + if (psContext->currentPhase == HS_CTRL_POINT_PHASE && hasControlPointPhase == 1) + { + psContext->AddIndentation(); + bcatcstr(bodyglsl, "if (patchValid) {\n"); + psContext->indent++; + + psContext->AddIndentation(); + bformata(bodyglsl, "input.cp[controlPointID] = vertexFunction(%s);\n", tessVertexFunctionArguments.c_str()); + } + else + { + psContext->AddIndentation(); + bcatcstr(bodyglsl, "{\n"); + psContext->indent++; + } + } + + if (psPhase->psInst.size() > 0) + { + //The minus one here is remove the return statement at end of phases. + //We don't want to translate that, we'll just end the function body. + ASSERT(psPhase->psInst[psPhase->psInst.size() - 1].eOpcode == OPCODE_RET); + for (i = 0; i < psPhase->psInst.size() - 1; ++i) + { + TranslateInstruction(&psPhase->psInst[i]); + } + } + + psContext->indent--; + psContext->AddIndentation(); + bformata(bodyglsl, "}\n"); + + if (psPhase->hasPostShaderCode) + { #ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(bodyglsl, "//--- Post shader code ---\n"); + psContext->AddIndentation(); + bcatcstr(bodyglsl, "//--- Post shader code ---\n"); #endif - bconcat(bodyglsl, psPhase->postShaderCode); + bconcat(bodyglsl, psPhase->postShaderCode); #ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(bodyglsl, "//--- End post shader code ---\n"); + psContext->AddIndentation(); + bcatcstr(bodyglsl, "//--- End post shader code ---\n"); #endif - } - - if (psShader->asPhases[ui32Phase].ePhase == HS_CTRL_POINT_PHASE) - { - // We're done printing control point phase, run the rest only once per patch - psContext->AddIndentation(); - bcatcstr(bodyglsl, "threadgroup_barrier(mem_flags::mem_threadgroup);\n"); - - psContext->AddIndentation(); - bcatcstr(bodyglsl, "if (!patchValid) {\n"); - psContext->indent++; - psContext->AddIndentation(); - bcatcstr(bodyglsl, "return;\n"); - psContext->indent--; - psContext->AddIndentation(); - bcatcstr(bodyglsl, "}\n"); - } - } - } - - if (hasControlPoint) - { - psContext->AddIndentation(); - bcatcstr(bodyglsl, "controlPoints[internalControlPointID] = output;\n"); - } - - psContext->AddIndentation(); - bcatcstr(bodyglsl, "tessFactors[internalPatchID] = tessFactor;\n"); - - if (hasPatchConstant) - { - psContext->AddIndentation(); - bcatcstr(bodyglsl, "patchConstants[internalPatchID] = patch;\n"); - } - - if(psContext->psDependencies) - { - //Save partitioning and primitive type for use by domain shader. - psContext->psDependencies->eTessOutPrim = psShader->sInfo.eTessOutPrim; - psContext->psDependencies->eTessPartitioning = psShader->sInfo.eTessPartitioning; - psContext->psDependencies->numPatchesInThreadGroup = numPatchesInThreadGroup; - psContext->psDependencies->hasControlPoint = hasControlPoint; - psContext->psDependencies->hasPatchConstant = hasPatchConstant; - } - } - else - { - if (psContext->psShader->asPhases[0].earlyMain->slen > 1) - { + } + + if (psShader->asPhases[ui32Phase].ePhase == HS_CTRL_POINT_PHASE) + { + // We're done printing control point phase, run the rest only once per patch + psContext->AddIndentation(); + bcatcstr(bodyglsl, "threadgroup_barrier(mem_flags::mem_threadgroup);\n"); + + psContext->AddIndentation(); + bcatcstr(bodyglsl, "if (!patchValid) {\n"); + psContext->indent++; + psContext->AddIndentation(); + bcatcstr(bodyglsl, "return;\n"); + psContext->indent--; + psContext->AddIndentation(); + bcatcstr(bodyglsl, "}\n"); + } + } + } + + if (hasControlPoint) + { + psContext->AddIndentation(); + bcatcstr(bodyglsl, "controlPoints[mtl_VertexID] = output;\n"); + } + + psContext->AddIndentation(); + bcatcstr(bodyglsl, "tessFactors[internalPatchID] = tessFactor;\n"); + + if (hasPatchConstant) + { + psContext->AddIndentation(); + bcatcstr(bodyglsl, "patchConstants[internalPatchID] = patch;\n"); + } + + if (psContext->psDependencies) + { + //Save partitioning and primitive type for use by domain shader. + psContext->psDependencies->eTessOutPrim = psShader->sInfo.eTessOutPrim; + psContext->psDependencies->eTessPartitioning = psShader->sInfo.eTessPartitioning; + psContext->psDependencies->numPatchesInThreadGroup = numPatchesInThreadGroup; + psContext->psDependencies->hasControlPoint = hasControlPoint; + psContext->psDependencies->hasPatchConstant = hasPatchConstant; + } + } + else + { + if (psContext->psShader->asPhases[0].earlyMain->slen > 1) + { #ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(bodyglsl, "//--- Start Early Main ---\n"); + psContext->AddIndentation(); + bcatcstr(bodyglsl, "//--- Start Early Main ---\n"); #endif - bconcat(bodyglsl, psContext->psShader->asPhases[0].earlyMain); + bconcat(bodyglsl, psContext->psShader->asPhases[0].earlyMain); #ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(bodyglsl, "//--- End Early Main ---\n"); + psContext->AddIndentation(); + bcatcstr(bodyglsl, "//--- End Early Main ---\n"); #endif - } - - for (i = 0; i < psShader->asPhases[0].psInst.size(); ++i) - { - TranslateInstruction(&psShader->asPhases[0].psInst[i]); - } - } - - psContext->indent--; - - bcatcstr(bodyglsl, "}\n"); - - psContext->currentGLSLString = &glsl; - - if(psShader->eShaderType == HULL_SHADER && psContext->psDependencies) - { - psContext->m_Reflection.OnTessellationKernelInfo(psContext->psDependencies->m_SharedBufferSlots.SaveTotalShaderStageAllocationsCount()); - } - - if(psShader->eShaderType == DOMAIN_SHADER && psContext->psDependencies) - { - int mtlTessellationPartitionMode = -1; - int mtlWinding = -1; - - switch (psContext->psDependencies->eTessPartitioning) - { - case TESSELLATOR_PARTITIONING_INTEGER: - mtlTessellationPartitionMode = 1; // MTLTessellationPartitionModeInteger - break; - case TESSELLATOR_PARTITIONING_POW2: - mtlTessellationPartitionMode = 0; // MTLTessellationPartitionModePow2 - break; - case TESSELLATOR_PARTITIONING_FRACTIONAL_ODD: - mtlTessellationPartitionMode = 2; // MTLTessellationPartitionModeFractionalOdd - break; - case TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN: - mtlTessellationPartitionMode = 3; // MTLTessellationPartitionModeFractionalEven - break; - case TESSELLATOR_PARTITIONING_UNDEFINED: - default: - ASSERT(0); - break; - } - - switch (psContext->psDependencies->eTessOutPrim) - { - case TESSELLATOR_OUTPUT_TRIANGLE_CW: - mtlWinding = 0; // MTLWindingClockwise - break; - case TESSELLATOR_OUTPUT_TRIANGLE_CCW: - mtlWinding = 1; // MTLWindingCounterClockwise - break; - case TESSELLATOR_OUTPUT_POINT: - psContext->m_Reflection.OnDiagnostics("Metal Tessellation: outputtopology(\"point\") not supported.", 0, true); - break; - case TESSELLATOR_OUTPUT_LINE: - psContext->m_Reflection.OnDiagnostics("Metal Tessellation: outputtopology(\"line\") not supported.", 0, true); - break; - case TESSELLATOR_OUTPUT_UNDEFINED: - default: - ASSERT(0); - break; - } - - psContext->m_Reflection.OnTessellationInfo(mtlTessellationPartitionMode, mtlWinding, (uint32_t) psContext->psDependencies->fMaxTessFactor, psContext->psDependencies->numPatchesInThreadGroup); - } - - bcatcstr(glsl, m_ExtraGlobalDefinitions.c_str()); - - // Print out extra functions we generated - std::for_each(m_FunctionDefinitions.begin(), m_FunctionDefinitions.end(), [&glsl](const FunctionDefinitions::value_type &p) - { - bcatcstr(glsl, p.second.c_str()); - bcatcstr(glsl, "\n"); - }); - - // And then the actual function body - bconcat(glsl, bodyglsl); - bdestroy(bodyglsl); - - return true; + } + + for (i = 0; i < psShader->asPhases[0].psInst.size(); ++i) + { + TranslateInstruction(&psShader->asPhases[0].psInst[i]); + } + } + + psContext->indent--; + + bcatcstr(bodyglsl, "}\n"); + + psContext->currentGLSLString = &glsl; + + if (psShader->eShaderType == HULL_SHADER && psContext->psDependencies) + { + psContext->m_Reflection.OnTessellationKernelInfo(psContext->psDependencies->m_SharedBufferSlots.SaveTotalShaderStageAllocationsCount()); + } + + if (psShader->eShaderType == DOMAIN_SHADER && psContext->psDependencies) + { + int mtlTessellationPartitionMode = -1; + int mtlWinding = -1; + + switch (psContext->psDependencies->eTessPartitioning) + { + case TESSELLATOR_PARTITIONING_INTEGER: + mtlTessellationPartitionMode = 1; // MTLTessellationPartitionModeInteger + break; + case TESSELLATOR_PARTITIONING_POW2: + mtlTessellationPartitionMode = 0; // MTLTessellationPartitionModePow2 + break; + case TESSELLATOR_PARTITIONING_FRACTIONAL_ODD: + mtlTessellationPartitionMode = 2; // MTLTessellationPartitionModeFractionalOdd + break; + case TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN: + mtlTessellationPartitionMode = 3; // MTLTessellationPartitionModeFractionalEven + break; + case TESSELLATOR_PARTITIONING_UNDEFINED: + default: + ASSERT(0); + break; + } + + switch (psContext->psDependencies->eTessOutPrim) + { + case TESSELLATOR_OUTPUT_TRIANGLE_CW: + mtlWinding = 0; // MTLWindingClockwise + break; + case TESSELLATOR_OUTPUT_TRIANGLE_CCW: + mtlWinding = 1; // MTLWindingCounterClockwise + break; + case TESSELLATOR_OUTPUT_POINT: + psContext->m_Reflection.OnDiagnostics("Metal Tessellation: outputtopology(\"point\") not supported.", 0, true); + break; + case TESSELLATOR_OUTPUT_LINE: + psContext->m_Reflection.OnDiagnostics("Metal Tessellation: outputtopology(\"line\") not supported.", 0, true); + break; + case TESSELLATOR_OUTPUT_UNDEFINED: + default: + ASSERT(0); + break; + } + + psContext->m_Reflection.OnTessellationInfo(mtlTessellationPartitionMode, mtlWinding, (uint32_t)psContext->psDependencies->fMaxTessFactor, psContext->psDependencies->numPatchesInThreadGroup); + } + + bcatcstr(glsl, m_ExtraGlobalDefinitions.c_str()); + + // Print out extra functions we generated + std::for_each(m_FunctionDefinitions.begin(), m_FunctionDefinitions.end(), [&glsl](const FunctionDefinitions::value_type &p) + { + bcatcstr(glsl, p.second.c_str()); + bcatcstr(glsl, "\n"); + }); + + // And then the actual function body + bconcat(glsl, bodyglsl); + bdestroy(bodyglsl); + + return true; } void ToMetal::DeclareExtraFunction(const std::string &name, const std::string &body) { - if (m_FunctionDefinitions.find(name) != m_FunctionDefinitions.end()) - return; - m_FunctionDefinitions.insert(std::make_pair(name, body)); + if (m_FunctionDefinitions.find(name) != m_FunctionDefinitions.end()) + return; + m_FunctionDefinitions.insert(std::make_pair(name, body)); } - std::string ToMetal::GetOutputStructName() const { - switch(psContext->psShader->eShaderType) - { - case VERTEX_SHADER: - return "Mtl_VertexOut"; - case PIXEL_SHADER: - return "Mtl_FragmentOut"; - case HULL_SHADER: - if (psContext->psShader->asPhases[psContext->currentPhase].ePhase == HS_FORK_PHASE || - psContext->psShader->asPhases[psContext->currentPhase].ePhase == HS_JOIN_PHASE) - return "Mtl_PatchConstant"; - return "Mtl_ControlPoint"; - case DOMAIN_SHADER: - return "Mtl_VertexOutPostTess"; - default: - ASSERT(0); - return ""; - } + switch (psContext->psShader->eShaderType) + { + case VERTEX_SHADER: + return "Mtl_VertexOut"; + case PIXEL_SHADER: + return "Mtl_FragmentOut"; + case HULL_SHADER: + if (psContext->psShader->asPhases[psContext->currentPhase].ePhase == HS_FORK_PHASE || + psContext->psShader->asPhases[psContext->currentPhase].ePhase == HS_JOIN_PHASE) + return "Mtl_PatchConstant"; + return "Mtl_ControlPoint"; + case DOMAIN_SHADER: + return "Mtl_VertexOutPostTess"; + default: + ASSERT(0); + return ""; + } } std::string ToMetal::GetInputStructName() const { - switch (psContext->psShader->eShaderType) - { - case VERTEX_SHADER: - return "Mtl_VertexIn"; - case PIXEL_SHADER: - return "Mtl_FragmentIn"; - case COMPUTE_SHADER: - return "Mtl_KernelIn"; - case HULL_SHADER: - return "Mtl_HullIn"; - case DOMAIN_SHADER: - return "Mtl_VertexInPostTess"; - default: - ASSERT(0); - return ""; - } + switch (psContext->psShader->eShaderType) + { + case VERTEX_SHADER: + return "Mtl_VertexIn"; + case PIXEL_SHADER: + return "Mtl_FragmentIn"; + case COMPUTE_SHADER: + return "Mtl_KernelIn"; + case HULL_SHADER: + return "Mtl_HullIn"; + case DOMAIN_SHADER: + return "Mtl_VertexInPostTess"; + default: + ASSERT(0); + return ""; + } } std::string ToMetal::GetCBName(const std::string& cbName) const { - std::string output = cbName; - if (cbName[0] == '$') - { - // "$Globals" should have different names in different shaders so that CbKey can discretely identify a CB. - switch (psContext->psShader->eShaderType) - { - case VERTEX_SHADER: - case HULL_SHADER: - case DOMAIN_SHADER: - output[0] = 'V'; - break; - case PIXEL_SHADER: - output[0] = 'F'; - break; - case COMPUTE_SHADER: - output = cbName.substr(1); - break; - default: - ASSERT(0); - break; - } - } - return output; + std::string output = cbName; + if (cbName[0] == '$') + { + // "$Globals" should have different names in different shaders so that CbKey can discretely identify a CB. + switch (psContext->psShader->eShaderType) + { + case VERTEX_SHADER: + case HULL_SHADER: + case DOMAIN_SHADER: + output[0] = 'V'; + break; + case PIXEL_SHADER: + output[0] = 'F'; + break; + case COMPUTE_SHADER: + output = cbName.substr(1); + break; + default: + ASSERT(0); + break; + } + } + return output; } void ToMetal::SetIOPrefixes() { - switch (psContext->psShader->eShaderType) - { - case VERTEX_SHADER: - case HULL_SHADER: - case DOMAIN_SHADER: - psContext->inputPrefix = "input."; - psContext->outputPrefix = "output."; - break; - - case PIXEL_SHADER: - psContext->inputPrefix = "input."; - psContext->outputPrefix = "output."; - break; - - case COMPUTE_SHADER: - psContext->inputPrefix = ""; - psContext->outputPrefix = ""; - break; - default: - ASSERT(0); - break; - } + switch (psContext->psShader->eShaderType) + { + case VERTEX_SHADER: + case HULL_SHADER: + case DOMAIN_SHADER: + psContext->inputPrefix = "input."; + psContext->outputPrefix = "output."; + break; + + case PIXEL_SHADER: + psContext->inputPrefix = "input."; + psContext->outputPrefix = "output."; + break; + + case COMPUTE_SHADER: + psContext->inputPrefix = ""; + psContext->outputPrefix = ""; + break; + default: + ASSERT(0); + break; + } } void ToMetal::ClampPartialPrecisions() { - HLSLcc::ForEachOperand(psContext->psShader->asPhases[0].psInst.begin(), psContext->psShader->asPhases[0].psInst.end(), FEO_FLAG_ALL, - [](std::vector::iterator &i, Operand *o, uint32_t flags) - { - if (o->eMinPrecision == OPERAND_MIN_PRECISION_FLOAT_2_8) - o->eMinPrecision = OPERAND_MIN_PRECISION_FLOAT_16; - }); + HLSLcc::ForEachOperand(psContext->psShader->asPhases[0].psInst.begin(), psContext->psShader->asPhases[0].psInst.end(), FEO_FLAG_ALL, + [](std::vector::iterator &i, Operand *o, uint32_t flags) + { + if (o->eMinPrecision == OPERAND_MIN_PRECISION_FLOAT_2_8) + o->eMinPrecision = OPERAND_MIN_PRECISION_FLOAT_16; + }); } void ToMetal::ReserveUAVBindingSlots(ShaderPhase *phase) { - for (uint32_t p = 0; p < phase->psDecl.size(); ++p) - { - uint32_t regNo = phase->psDecl[p].asOperands[0].ui32RegisterNumber; - - if (phase->psDecl[p].eOpcode == OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW || - phase->psDecl[p].eOpcode == OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED) - { - m_BufferSlots.ReserveBindingSlot(regNo, BindingSlotAllocator::RWBuffer); - } - else if (phase->psDecl[p].eOpcode == OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED) - { - // Typed buffers are atm faked using structured buffers -> bind in buffer space - if (phase->psDecl[p].value.eResourceDimension == RESOURCE_DIMENSION_BUFFER) - m_BufferSlots.ReserveBindingSlot(regNo, BindingSlotAllocator::RWBuffer); - else - m_TextureSlots.ReserveBindingSlot(regNo, BindingSlotAllocator::UAV); - } - } + for (uint32_t p = 0; p < phase->psDecl.size(); ++p) + { + uint32_t regNo = phase->psDecl[p].asOperands[0].ui32RegisterNumber; + + if (phase->psDecl[p].eOpcode == OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW || + phase->psDecl[p].eOpcode == OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED) + { + m_BufferSlots.ReserveBindingSlot(regNo, BindingSlotAllocator::RWBuffer); + } + else if (phase->psDecl[p].eOpcode == OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED) + { + // Typed buffers are atm faked using structured buffers -> bind in buffer space + if (phase->psDecl[p].value.eResourceDimension == RESOURCE_DIMENSION_BUFFER) + m_BufferSlots.ReserveBindingSlot(regNo, BindingSlotAllocator::RWBuffer); + else + m_TextureSlots.ReserveBindingSlot(regNo, BindingSlotAllocator::UAV); + } + } } diff --git a/src/toMetalDeclaration.cpp b/src/toMetalDeclaration.cpp index bce240c..1087a5d 100644 --- a/src/toMetalDeclaration.cpp +++ b/src/toMetalDeclaration.cpp @@ -1,4 +1,3 @@ - #include "internal_includes/toMetal.h" #include "internal_includes/debug.h" #include "internal_includes/HLSLccToolkit.h" @@ -7,6 +6,8 @@ #include #include +using namespace HLSLcc; + #ifndef fpcheck #ifdef _MSC_VER #define fpcheck(x) (_isnan(x) || !_finite(x)) @@ -18,1052 +19,1044 @@ bool ToMetal::TranslateSystemValue(const Operand *psOperand, const ShaderInfo::InOutSignature *sig, std::string &result, uint32_t *pui32IgnoreSwizzle, bool isIndexed, bool isInput, bool *outSkipPrefix, int *iIgnoreRedirect) { - if (psContext->psShader->eShaderType == HULL_SHADER && sig && sig->semanticName == "SV_TessFactor") - { - if (pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - ASSERT(sig->ui32SemanticIndex <= 3); - std::ostringstream oss; - oss << "tessFactor.edgeTessellationFactor[" << sig->ui32SemanticIndex << "]"; - result = oss.str(); - if (outSkipPrefix != NULL) *outSkipPrefix = true; - if (iIgnoreRedirect != NULL) *iIgnoreRedirect = 1; - return true; - } - - if (psContext->psShader->eShaderType == HULL_SHADER && sig && sig->semanticName == "SV_InsideTessFactor") - { - if (pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - ASSERT(sig->ui32SemanticIndex <= 1); - std::ostringstream oss; - oss << "tessFactor.insideTessellationFactor"; - if (psContext->psShader->sInfo.eTessDomain != TESSELLATOR_DOMAIN_TRI) - oss << "[" << sig->ui32SemanticIndex << "]"; - result = oss.str(); - if (outSkipPrefix != NULL) *outSkipPrefix = true; - if (iIgnoreRedirect != NULL) *iIgnoreRedirect = 1; - return true; - } - - if (sig && sig->semanticName == "SV_InstanceID") - { - if (pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - } - - if (sig && ((sig->eSystemValueType == NAME_POSITION || sig->semanticName == "POS") && sig->ui32SemanticIndex == 0) && - ((psContext->psShader->eShaderType == VERTEX_SHADER && (psContext->flags & HLSLCC_FLAG_METAL_TESSELLATION) == 0))) - { - result = "mtl_Position"; - return true; - } - - if (sig) - { - switch (sig->eSystemValueType) - { - case NAME_POSITION: - if (psContext->psShader->eShaderType == PIXEL_SHADER) - result = "mtl_FragCoord"; - else - result = "mtl_Position"; - if (outSkipPrefix != NULL) *outSkipPrefix = true; - return true; - case NAME_RENDER_TARGET_ARRAY_INDEX: - result = "mtl_Layer"; - if (outSkipPrefix != NULL) *outSkipPrefix = true; - if (pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - return true; - case NAME_CLIP_DISTANCE: - { - // this is temp variable, declaration and redirecting to actual output is handled in DeclareClipPlanes - char tmpName[128]; sprintf(tmpName, "phase%d_ClipDistance%d", psContext->currentPhase, sig->ui32SemanticIndex); - result = tmpName; - if (outSkipPrefix != NULL) *outSkipPrefix = true; - if (iIgnoreRedirect != NULL) *iIgnoreRedirect = 1; - return true; - } - case NAME_VIEWPORT_ARRAY_INDEX: - result = "mtl_ViewPortIndex"; - if (outSkipPrefix != NULL) *outSkipPrefix = true; - if (pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - return true; - case NAME_VERTEX_ID: - result = "mtl_VertexID"; - if (outSkipPrefix != NULL) *outSkipPrefix = true; - if (pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - return true; - case NAME_INSTANCE_ID: - result = "mtl_InstanceID"; - if (pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - if (outSkipPrefix != NULL) *outSkipPrefix = true; - return true; - case NAME_IS_FRONT_FACE: - result = "(mtl_FrontFace ? 0xffffffffu : uint(0))"; - if (outSkipPrefix != NULL) *outSkipPrefix = true; - if (pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - return true; - case NAME_SAMPLE_INDEX: - result = "mtl_SampleID"; - if (outSkipPrefix != NULL) *outSkipPrefix = true; - if (pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - return true; - - default: - break; - } - } + if (psContext->psShader->eShaderType == HULL_SHADER && sig && sig->semanticName == "SV_TessFactor") + { + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + ASSERT(sig->ui32SemanticIndex <= 3); + std::ostringstream oss; + oss << "tessFactor.edgeTessellationFactor[" << sig->ui32SemanticIndex << "]"; + result = oss.str(); + if (outSkipPrefix != NULL) *outSkipPrefix = true; + if (iIgnoreRedirect != NULL) *iIgnoreRedirect = 1; + return true; + } - if (psContext->psShader->asPhases[psContext->currentPhase].ePhase == HS_CTRL_POINT_PHASE || - psContext->psShader->asPhases[psContext->currentPhase].ePhase == HS_FORK_PHASE) + if (psContext->psShader->eShaderType == HULL_SHADER && sig && sig->semanticName == "SV_InsideTessFactor") { + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + ASSERT(sig->ui32SemanticIndex <= 1); + std::ostringstream oss; + oss << "tessFactor.insideTessellationFactor"; + if (psContext->psShader->sInfo.eTessDomain != TESSELLATOR_DOMAIN_TRI) + oss << "[" << sig->ui32SemanticIndex << "]"; + result = oss.str(); + if (outSkipPrefix != NULL) *outSkipPrefix = true; + if (iIgnoreRedirect != NULL) *iIgnoreRedirect = 1; + return true; + } - std::ostringstream oss; - oss << sig->semanticName << sig->ui32SemanticIndex; - result = oss.str(); + if (sig && sig->semanticName == "SV_InstanceID") + { + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + } + + if (sig && ((sig->eSystemValueType == NAME_POSITION || sig->semanticName == "POS") && sig->ui32SemanticIndex == 0) && + ((psContext->psShader->eShaderType == VERTEX_SHADER && (psContext->flags & HLSLCC_FLAG_METAL_TESSELLATION) == 0))) + { + result = "mtl_Position"; + return true; + } + + if (sig) + { + switch (sig->eSystemValueType) + { + case NAME_POSITION: + if (psContext->psShader->eShaderType == PIXEL_SHADER) + result = "hlslcc_FragCoord"; + else + result = "mtl_Position"; + if (outSkipPrefix != NULL) *outSkipPrefix = true; + return true; + case NAME_RENDER_TARGET_ARRAY_INDEX: + result = "mtl_Layer"; + if (outSkipPrefix != NULL) *outSkipPrefix = true; + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + return true; + case NAME_CLIP_DISTANCE: + { + // this is temp variable, declaration and redirecting to actual output is handled in DeclareClipPlanes + char tmpName[128]; sprintf(tmpName, "phase%d_ClipDistance%d", psContext->currentPhase, sig->ui32SemanticIndex); + result = tmpName; + if (outSkipPrefix != NULL) *outSkipPrefix = true; + if (iIgnoreRedirect != NULL) *iIgnoreRedirect = 1; + return true; + } + case NAME_VIEWPORT_ARRAY_INDEX: + result = "mtl_ViewPortIndex"; + if (outSkipPrefix != NULL) *outSkipPrefix = true; + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + return true; + case NAME_VERTEX_ID: + result = "mtl_VertexID"; + if (outSkipPrefix != NULL) *outSkipPrefix = true; + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + return true; + case NAME_INSTANCE_ID: + result = "mtl_InstanceID"; + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + if (outSkipPrefix != NULL) *outSkipPrefix = true; + return true; + case NAME_IS_FRONT_FACE: + result = "(mtl_FrontFace ? 0xffffffffu : uint(0))"; + if (outSkipPrefix != NULL) *outSkipPrefix = true; + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + return true; + case NAME_SAMPLE_INDEX: + result = "mtl_SampleID"; + if (outSkipPrefix != NULL) *outSkipPrefix = true; + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + return true; + + default: + break; + } + } + + if (psContext->psShader->asPhases[psContext->currentPhase].ePhase == HS_CTRL_POINT_PHASE || + psContext->psShader->asPhases[psContext->currentPhase].ePhase == HS_FORK_PHASE) + { + std::ostringstream oss; + oss << sig->semanticName << sig->ui32SemanticIndex; + result = oss.str(); return true; } - switch (psOperand->eType) - { - case OPERAND_TYPE_INPUT_COVERAGE_MASK: - case OPERAND_TYPE_OUTPUT_COVERAGE_MASK: - result = "mtl_CoverageMask"; - if (outSkipPrefix != NULL) *outSkipPrefix = true; - if (pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - return true; - case OPERAND_TYPE_INPUT_THREAD_ID: - result = "mtl_ThreadID"; - if (outSkipPrefix != NULL) *outSkipPrefix = true; - return true; - case OPERAND_TYPE_INPUT_THREAD_GROUP_ID: - result = "mtl_ThreadGroupID"; - if (outSkipPrefix != NULL) *outSkipPrefix = true; - return true; - case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP: - result = "mtl_ThreadIDInGroup"; - if (outSkipPrefix != NULL) *outSkipPrefix = true; - return true; - case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED: - result = "mtl_ThreadIndexInThreadGroup"; - if (outSkipPrefix != NULL) *outSkipPrefix = true; - if (pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - return true; - case OPERAND_TYPE_INPUT_DOMAIN_POINT: - result = "mtl_TessCoord"; - if (outSkipPrefix != NULL) *outSkipPrefix = true; - if (pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - return true; - case OPERAND_TYPE_OUTPUT_DEPTH: - case OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL: - case OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL: - result = "mtl_Depth"; - if (outSkipPrefix != NULL) *outSkipPrefix = true; - if (pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - return true; - case OPERAND_TYPE_OUTPUT: - case OPERAND_TYPE_INPUT: - { - std::ostringstream oss; - ASSERT(sig != NULL); - oss << sig->semanticName << sig->ui32SemanticIndex; - result = oss.str(); - if (HLSLcc::WriteMaskToComponentCount(sig->ui32Mask) == 1 && pui32IgnoreSwizzle != NULL) - *pui32IgnoreSwizzle = 1; - return true; - } - case OPERAND_TYPE_INPUT_PATCH_CONSTANT: - { - std::ostringstream oss; - oss << sig->semanticName << sig->ui32SemanticIndex; - result = oss.str(); - if (outSkipPrefix != NULL) *outSkipPrefix = true; - return true; - } - case OPERAND_TYPE_INPUT_CONTROL_POINT: - { - std::ostringstream oss; - oss << sig->semanticName << sig->ui32SemanticIndex; - result = oss.str(); - if (outSkipPrefix != NULL) *outSkipPrefix = true; - return true; - break; - } - default: + switch (psOperand->eType) + { + case OPERAND_TYPE_INPUT_COVERAGE_MASK: + case OPERAND_TYPE_OUTPUT_COVERAGE_MASK: + result = "mtl_CoverageMask"; + if (outSkipPrefix != NULL) *outSkipPrefix = true; + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + return true; + case OPERAND_TYPE_INPUT_THREAD_ID: + result = "mtl_ThreadID"; + if (outSkipPrefix != NULL) *outSkipPrefix = true; + return true; + case OPERAND_TYPE_INPUT_THREAD_GROUP_ID: + result = "mtl_ThreadGroupID"; + if (outSkipPrefix != NULL) *outSkipPrefix = true; + return true; + case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP: + result = "mtl_ThreadIDInGroup"; + if (outSkipPrefix != NULL) *outSkipPrefix = true; + return true; + case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED: + result = "mtl_ThreadIndexInThreadGroup"; + if (outSkipPrefix != NULL) *outSkipPrefix = true; + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + return true; + case OPERAND_TYPE_INPUT_DOMAIN_POINT: + result = "mtl_TessCoord"; + if (outSkipPrefix != NULL) *outSkipPrefix = true; + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + return true; + case OPERAND_TYPE_OUTPUT_DEPTH: + case OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL: + case OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL: + result = "mtl_Depth"; + if (outSkipPrefix != NULL) *outSkipPrefix = true; + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + return true; + case OPERAND_TYPE_OUTPUT: + case OPERAND_TYPE_INPUT: + { + std::ostringstream oss; + ASSERT(sig != NULL); + oss << sig->semanticName << sig->ui32SemanticIndex; + result = oss.str(); + if (HLSLcc::WriteMaskToComponentCount(sig->ui32Mask) == 1 && pui32IgnoreSwizzle != NULL) + *pui32IgnoreSwizzle = 1; + return true; + } + case OPERAND_TYPE_INPUT_PATCH_CONSTANT: + { + std::ostringstream oss; + oss << sig->semanticName << sig->ui32SemanticIndex; + result = oss.str(); + if (outSkipPrefix != NULL) *outSkipPrefix = true; + return true; + } + case OPERAND_TYPE_INPUT_CONTROL_POINT: + { + std::ostringstream oss; + oss << sig->semanticName << sig->ui32SemanticIndex; + result = oss.str(); + if (outSkipPrefix != NULL) *outSkipPrefix = true; + return true; + break; + } + default: ASSERT(0); break; - } - + } - return false; + return false; } void ToMetal::DeclareBuiltinInput(const Declaration *psDecl) { - const SPECIAL_NAME eSpecialName = psDecl->asOperands[0].eSpecialName; - - switch (eSpecialName) - { - case NAME_POSITION: - ASSERT(psContext->psShader->eShaderType == PIXEL_SHADER); - m_StructDefinitions[""].m_Members.push_back(std::make_pair("mtl_FragCoord", "float4 mtl_FragCoord [[ position ]]")); - break; - case NAME_RENDER_TARGET_ARRAY_INDEX: - // Only supported on a Mac - m_StructDefinitions[""].m_Members.push_back(std::make_pair("mtl_Layer", "uint mtl_Layer [[ render_target_array_index ]]")); - break; - case NAME_CLIP_DISTANCE: - ASSERT(0); // Should never be an input - break; - case NAME_VIEWPORT_ARRAY_INDEX: - // Only supported on a Mac - m_StructDefinitions[""].m_Members.push_back(std::make_pair("mtl_ViewPortIndex", "uint mtl_ViewPortIndex [[ viewport_array_index ]]")); - break; - case NAME_INSTANCE_ID: - m_StructDefinitions[""].m_Members.push_back(std::make_pair("mtl_InstanceID", "uint mtl_InstanceID [[ instance_id ]]")); - break; - case NAME_IS_FRONT_FACE: - m_StructDefinitions[""].m_Members.push_back(std::make_pair("mtl_FrontFace", "bool mtl_FrontFace [[ front_facing ]]")); - break; - case NAME_SAMPLE_INDEX: - m_StructDefinitions[""].m_Members.push_back(std::make_pair("mtl_SampleID", "uint mtl_SampleID [[ sample_id ]]")); - break; - case NAME_VERTEX_ID: - m_StructDefinitions[""].m_Members.push_back(std::make_pair("mtl_VertexID", "uint mtl_VertexID [[ vertex_id ]]")); - break; - case NAME_PRIMITIVE_ID: - // Not on Metal - ASSERT(0); - break; - default: - m_StructDefinitions[""].m_Members.push_back(std::make_pair(psDecl->asOperands[0].specialName, std::string("float4 ").append(psDecl->asOperands[0].specialName))); - ASSERT(0); // Catch this to see what's happening - break; - } + const SPECIAL_NAME eSpecialName = psDecl->asOperands[0].eSpecialName; + + Shader* psShader = psContext->psShader; + const Operand* psOperand = &psDecl->asOperands[0]; + const int regSpace = psOperand->GetRegisterSpace(psContext); + ASSERT(regSpace == 0); + + // we need to at least mark if they are scalars or not (as we might need to use vector ctor) + if (psOperand->GetNumInputElements(psContext) == 1) + psShader->abScalarInput[regSpace][psOperand->ui32RegisterNumber] |= (int)psOperand->ui32CompMask; + + switch (eSpecialName) + { + case NAME_POSITION: + ASSERT(psContext->psShader->eShaderType == PIXEL_SHADER); + m_StructDefinitions[""].m_Members.push_back(std::make_pair("mtl_FragCoord", "float4 mtl_FragCoord [[ position ]]")); + bcatcstr(GetEarlyMain(psContext), "float4 hlslcc_FragCoord = float4(mtl_FragCoord.xyz, 1.0/mtl_FragCoord.w);\n"); + break; + case NAME_RENDER_TARGET_ARRAY_INDEX: + // Only supported on a Mac + m_StructDefinitions[""].m_Members.push_back(std::make_pair("mtl_Layer", "uint mtl_Layer [[ render_target_array_index ]]")); + break; + case NAME_CLIP_DISTANCE: + ASSERT(0); // Should never be an input + break; + case NAME_VIEWPORT_ARRAY_INDEX: + // Only supported on a Mac + m_StructDefinitions[""].m_Members.push_back(std::make_pair("mtl_ViewPortIndex", "uint mtl_ViewPortIndex [[ viewport_array_index ]]")); + break; + case NAME_INSTANCE_ID: + m_StructDefinitions[""].m_Members.push_back(std::make_pair("mtl_InstanceID", "uint mtl_InstanceID [[ instance_id ]]")); + break; + case NAME_IS_FRONT_FACE: + m_StructDefinitions[""].m_Members.push_back(std::make_pair("mtl_FrontFace", "bool mtl_FrontFace [[ front_facing ]]")); + break; + case NAME_SAMPLE_INDEX: + m_StructDefinitions[""].m_Members.push_back(std::make_pair("mtl_SampleID", "uint mtl_SampleID [[ sample_id ]]")); + break; + case NAME_VERTEX_ID: + m_StructDefinitions[""].m_Members.push_back(std::make_pair("mtl_VertexID", "uint mtl_VertexID [[ vertex_id ]]")); + break; + case NAME_PRIMITIVE_ID: + // Not on Metal + ASSERT(0); + break; + default: + m_StructDefinitions[""].m_Members.push_back(std::make_pair(psDecl->asOperands[0].specialName, std::string("float4 ").append(psDecl->asOperands[0].specialName))); + ASSERT(0); // Catch this to see what's happening + break; + } } void ToMetal::DeclareClipPlanes(const Declaration* decl, unsigned declCount) { - unsigned planeCount = 0; - for(unsigned i = 0, n = declCount ; i < n ; ++i) - { - const Operand* operand = &decl[i].asOperands[0]; - if(operand->eSpecialName == NAME_CLIP_DISTANCE) - planeCount += operand->GetMaxComponent(); - } - if(planeCount == 0) return; - - std::ostringstream oss; oss << "float mtl_ClipDistance [[ clip_distance ]]"; - if(planeCount > 1) oss << "[" << planeCount << "]"; - m_StructDefinitions[GetOutputStructName()].m_Members.push_back(std::make_pair(std::string("mtl_ClipDistance"), oss.str())); - - Shader* shader = psContext->psShader; - - unsigned compCount = 1; - const ShaderInfo::InOutSignature* psFirstClipSignature; - if(shader->sInfo.GetOutputSignatureFromSystemValue(NAME_CLIP_DISTANCE, 0, &psFirstClipSignature)) - { - if(psFirstClipSignature->ui32Mask & (1 << 3)) compCount = 4; - else if(psFirstClipSignature->ui32Mask & (1 << 2)) compCount = 3; - else if(psFirstClipSignature->ui32Mask & (1 << 1)) compCount = 2; - } - - ShaderPhase* phase = &shader->asPhases[psContext->currentPhase]; - for(unsigned i = 0, n = declCount ; i < n ; ++i) - { - const Operand* operand = &decl[i].asOperands[0]; - if(operand->eSpecialName != NAME_CLIP_DISTANCE) continue; - - const ShaderInfo::InOutSignature* signature = 0; - shader->sInfo.GetOutputSignatureFromRegister(operand->ui32RegisterNumber, operand->ui32CompMask, 0, &signature); - const int semanticIndex = signature->ui32SemanticIndex; - - bformata(phase->earlyMain, " float4 phase%d_ClipDistance%d;\n", psContext->currentPhase, signature->ui32SemanticIndex); - - const char* swizzleStr[] = { "x", "y", "z", "w" }; - phase->hasPostShaderCode = 1; - if(planeCount > 1) - { - for(int i = 0 ; i < compCount ; ++i) - { - bformata(phase->postShaderCode, " %s.mtl_ClipDistance[%d] = phase%d_ClipDistance%d.%s;\n", - "output", semanticIndex*compCount + i, psContext->currentPhase, semanticIndex, swizzleStr[i] - ); - } - } - else - { - bformata(phase->postShaderCode, " %s.mtl_ClipDistance = phase%d_ClipDistance%d.x;\n", "output", psContext->currentPhase, semanticIndex); - } - } + unsigned planeCount = 0; + for (unsigned i = 0, n = declCount; i < n; ++i) + { + const Operand* operand = &decl[i].asOperands[0]; + if (operand->eSpecialName == NAME_CLIP_DISTANCE) + planeCount += operand->GetMaxComponent(); + } + if (planeCount == 0) return; + + std::ostringstream oss; oss << "float mtl_ClipDistance [[ clip_distance ]]"; + if (planeCount > 1) oss << "[" << planeCount << "]"; + m_StructDefinitions[GetOutputStructName()].m_Members.push_back(std::make_pair(std::string("mtl_ClipDistance"), oss.str())); + + Shader* shader = psContext->psShader; + + unsigned compCount = 1; + const ShaderInfo::InOutSignature* psFirstClipSignature; + if (shader->sInfo.GetOutputSignatureFromSystemValue(NAME_CLIP_DISTANCE, 0, &psFirstClipSignature)) + { + if (psFirstClipSignature->ui32Mask & (1 << 3)) compCount = 4; + else if (psFirstClipSignature->ui32Mask & (1 << 2)) compCount = 3; + else if (psFirstClipSignature->ui32Mask & (1 << 1)) compCount = 2; + } + + for (unsigned i = 0, n = declCount; i < n; ++i) + { + const Operand* operand = &decl[i].asOperands[0]; + if (operand->eSpecialName != NAME_CLIP_DISTANCE) continue; + + const ShaderInfo::InOutSignature* signature = 0; + shader->sInfo.GetOutputSignatureFromRegister(operand->ui32RegisterNumber, operand->ui32CompMask, 0, &signature); + const int semanticIndex = signature->ui32SemanticIndex; + + bformata(GetEarlyMain(psContext), "float4 phase%d_ClipDistance%d;\n", psContext->currentPhase, signature->ui32SemanticIndex); + + const char* swizzleStr[] = { "x", "y", "z", "w" }; + if (planeCount > 1) + { + for (int i = 0; i < compCount; ++i) + { + bformata(GetPostShaderCode(psContext), "%s.mtl_ClipDistance[%d] = phase%d_ClipDistance%d.%s;\n", "output", semanticIndex * compCount + i, psContext->currentPhase, semanticIndex, swizzleStr[i]); + } + } + else + { + bformata(GetPostShaderCode(psContext), "%s.mtl_ClipDistance = phase%d_ClipDistance%d.x;\n", "output", psContext->currentPhase, semanticIndex); + } + } } + void ToMetal::GenerateTexturesReflection(HLSLccReflection* refl) { - for(unsigned i = 0, n = m_Textures.size() ; i < n ; ++i) - { - const std::string samplerName1 = m_Textures[i].name, samplerName2 = "sampler"+m_Textures[i].name; - for(unsigned j = 0, m = m_Samplers.size() ; j < m ; ++j) - { - if(m_Samplers[j].name == samplerName1 || m_Samplers[j].name == samplerName2) - { - m_Textures[i].samplerBind = m_Samplers[j].slot; - break; - } - } - } - - for(unsigned i = 0, n = m_Textures.size() ; i < n ; ++i) - refl->OnTextureBinding(m_Textures[i].name, m_Textures[i].textureBind, m_Textures[i].samplerBind, m_Textures[i].isMultisampled, m_Textures[i].dim, m_Textures[i].uav); + for (unsigned i = 0, n = m_Textures.size(); i < n; ++i) + { + // Match CheckSamplerAndTextureNameMatch behavior + const std::string samplerName1 = m_Textures[i].name, samplerName2 = "sampler" + m_Textures[i].name, samplerName3 = "sampler_" + m_Textures[i].name; + for (unsigned j = 0, m = m_Samplers.size(); j < m; ++j) + { + if (m_Samplers[j].name == samplerName1 || m_Samplers[j].name == samplerName2 || m_Samplers[j].name == samplerName3) + { + m_Textures[i].samplerBind = m_Samplers[j].slot; + break; + } + } + } + + for (unsigned i = 0, n = m_Textures.size(); i < n; ++i) + refl->OnTextureBinding(m_Textures[i].name, m_Textures[i].textureBind, m_Textures[i].samplerBind, m_Textures[i].isMultisampled, m_Textures[i].dim, m_Textures[i].uav); } void ToMetal::DeclareBuiltinOutput(const Declaration *psDecl) { - std::string out = GetOutputStructName(); - - switch (psDecl->asOperands[0].eSpecialName) - { - case NAME_POSITION: - m_StructDefinitions[out].m_Members.push_back(std::make_pair("mtl_Position", "float4 mtl_Position [[ position ]]")); - break; - case NAME_RENDER_TARGET_ARRAY_INDEX: - // Only supported on a Mac - m_StructDefinitions[out].m_Members.push_back(std::make_pair("mtl_Layer", "uint mtl_Layer [[ render_target_array_index ]]")); - break; - case NAME_CLIP_DISTANCE: - // it will be done separately in DeclareClipPlanes - break; - case NAME_VIEWPORT_ARRAY_INDEX: - // Only supported on a Mac - m_StructDefinitions[out].m_Members.push_back(std::make_pair("mtl_ViewPortIndex", "uint mtl_ViewPortIndex [[ viewport_array_index ]]")); - break; - case NAME_VERTEX_ID: - ASSERT(0); //VertexID is not an output - break; - case NAME_PRIMITIVE_ID: - // Not on Metal - ASSERT(0); - break; - case NAME_INSTANCE_ID: - ASSERT(0); //InstanceID is not an output - break; - case NAME_IS_FRONT_FACE: - ASSERT(0); //FrontFacing is not an output - break; - - //For the quadrilateral domain, there are 6 factors (4 sides, 2 inner). - case NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR: - case NAME_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR: - case NAME_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR: - case NAME_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR: - case NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR: - case NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR: - - //For the triangular domain, there are 4 factors (3 sides, 1 inner) - case NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR: - case NAME_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR: - case NAME_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR: - case NAME_FINAL_TRI_INSIDE_TESSFACTOR: - - //For the isoline domain, there are 2 factors (detail and density). - case NAME_FINAL_LINE_DETAIL_TESSFACTOR: - case NAME_FINAL_LINE_DENSITY_TESSFACTOR: - { - // Handled separately - break; - } - default: - // This might be SV_Position (because d3dcompiler is weird). Get signature and check + std::string out = GetOutputStructName(); + + switch (psDecl->asOperands[0].eSpecialName) + { + case NAME_POSITION: + m_StructDefinitions[out].m_Members.push_back(std::make_pair("mtl_Position", "float4 mtl_Position [[ position ]]")); + break; + case NAME_RENDER_TARGET_ARRAY_INDEX: + // Only supported on a Mac + m_StructDefinitions[out].m_Members.push_back(std::make_pair("mtl_Layer", "uint mtl_Layer [[ render_target_array_index ]]")); + break; + case NAME_CLIP_DISTANCE: + // it will be done separately in DeclareClipPlanes + break; + case NAME_VIEWPORT_ARRAY_INDEX: + // Only supported on a Mac + m_StructDefinitions[out].m_Members.push_back(std::make_pair("mtl_ViewPortIndex", "uint mtl_ViewPortIndex [[ viewport_array_index ]]")); + break; + case NAME_VERTEX_ID: + ASSERT(0); //VertexID is not an output + break; + case NAME_PRIMITIVE_ID: + // Not on Metal + ASSERT(0); + break; + case NAME_INSTANCE_ID: + ASSERT(0); //InstanceID is not an output + break; + case NAME_IS_FRONT_FACE: + ASSERT(0); //FrontFacing is not an output + break; + + //For the quadrilateral domain, there are 6 factors (4 sides, 2 inner). + case NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR: + case NAME_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR: + case NAME_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR: + case NAME_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR: + case NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR: + case NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR: + + //For the triangular domain, there are 4 factors (3 sides, 1 inner) + case NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR: + case NAME_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR: + case NAME_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR: + case NAME_FINAL_TRI_INSIDE_TESSFACTOR: + + //For the isoline domain, there are 2 factors (detail and density). + case NAME_FINAL_LINE_DETAIL_TESSFACTOR: + case NAME_FINAL_LINE_DENSITY_TESSFACTOR: + { + // Handled separately + break; + } + default: + // This might be SV_Position (because d3dcompiler is weird). Get signature and check const ShaderInfo::InOutSignature *sig = NULL; - psContext->psShader->sInfo.GetOutputSignatureFromRegister(psDecl->asOperands[0].ui32RegisterNumber, psDecl->asOperands[0].GetAccessMask(), 0, &sig); - ASSERT(sig != NULL); - if (sig->eSystemValueType == NAME_POSITION && sig->ui32SemanticIndex == 0) - { - m_StructDefinitions[out].m_Members.push_back(std::make_pair("mtl_Position", "float4 mtl_Position [[ position ]]")); - break; - } - - ASSERT(0); // Wut - break; - } + psContext->psShader->sInfo.GetOutputSignatureFromRegister(psDecl->asOperands[0].ui32RegisterNumber, psDecl->asOperands[0].GetAccessMask(), 0, &sig); + ASSERT(sig != NULL); + if (sig->eSystemValueType == NAME_POSITION && sig->ui32SemanticIndex == 0) + { + m_StructDefinitions[out].m_Members.push_back(std::make_pair("mtl_Position", "float4 mtl_Position [[ position ]]")); + break; + } + + ASSERT(0); // Wut + break; + } } static std::string BuildOperandTypeString(OPERAND_MIN_PRECISION ePrec, INOUT_COMPONENT_TYPE eType, int numComponents) { - SHADER_VARIABLE_TYPE t = SVT_FLOAT; - switch (eType) - { - case INOUT_COMPONENT_FLOAT32: - t = SVT_FLOAT; - break; - case INOUT_COMPONENT_UINT32: - t = SVT_UINT; - break; - case INOUT_COMPONENT_SINT32: - t = SVT_INT; - break; - default: - ASSERT(0); - break; - } - // Can be overridden by precision - switch (ePrec) - { - case OPERAND_MIN_PRECISION_DEFAULT: - break; - - case OPERAND_MIN_PRECISION_FLOAT_16: - ASSERT(eType == INOUT_COMPONENT_FLOAT32); - t = SVT_FLOAT16; - break; - - case OPERAND_MIN_PRECISION_FLOAT_2_8: - ASSERT(eType == INOUT_COMPONENT_FLOAT32); - t = SVT_FLOAT10; - break; - - case OPERAND_MIN_PRECISION_SINT_16: - ASSERT(eType == INOUT_COMPONENT_SINT32); - t = SVT_INT16; - break; - case OPERAND_MIN_PRECISION_UINT_16: - ASSERT(eType == INOUT_COMPONENT_UINT32); - t = SVT_UINT16; - break; - } - return HLSLcc::GetConstructorForTypeMetal(t, numComponents); + SHADER_VARIABLE_TYPE t = SVT_FLOAT; + switch (eType) + { + case INOUT_COMPONENT_FLOAT32: + t = SVT_FLOAT; + break; + case INOUT_COMPONENT_UINT32: + t = SVT_UINT; + break; + case INOUT_COMPONENT_SINT32: + t = SVT_INT; + break; + default: + ASSERT(0); + break; + } + // Can be overridden by precision + switch (ePrec) + { + case OPERAND_MIN_PRECISION_DEFAULT: + break; + + case OPERAND_MIN_PRECISION_FLOAT_16: + ASSERT(eType == INOUT_COMPONENT_FLOAT32); + t = SVT_FLOAT16; + break; + + case OPERAND_MIN_PRECISION_FLOAT_2_8: + ASSERT(eType == INOUT_COMPONENT_FLOAT32); + t = SVT_FLOAT10; + break; + + case OPERAND_MIN_PRECISION_SINT_16: + ASSERT(eType == INOUT_COMPONENT_SINT32); + t = SVT_INT16; + break; + case OPERAND_MIN_PRECISION_UINT_16: + ASSERT(eType == INOUT_COMPONENT_UINT32); + t = SVT_UINT16; + break; + } + return HLSLcc::GetConstructorForTypeMetal(t, numComponents); } void ToMetal::DeclareHullShaderPassthrough() { - uint32_t i; + uint32_t i; - for (i = 0; i < psContext->psShader->sInfo.psInputSignatures.size(); i++) - { - ShaderInfo::InOutSignature *psSig = &psContext->psShader->sInfo.psInputSignatures[i]; + for (i = 0; i < psContext->psShader->sInfo.psInputSignatures.size(); i++) + { + ShaderInfo::InOutSignature *psSig = &psContext->psShader->sInfo.psInputSignatures[i]; - std::string name; - { - std::ostringstream oss; - oss << psSig->semanticName << psSig->ui32SemanticIndex; - name = oss.str(); - } + std::string name; + { + std::ostringstream oss; + oss << psSig->semanticName << psSig->ui32SemanticIndex; + name = oss.str(); + } - uint32_t ui32NumComponents = HLSLcc::GetNumberBitsSet(psSig->ui32Mask); - std::string typeName = BuildOperandTypeString(OPERAND_MIN_PRECISION_DEFAULT, psSig->eComponentType, ui32NumComponents); + uint32_t ui32NumComponents = HLSLcc::GetNumberBitsSet(psSig->ui32Mask); + std::string typeName = BuildOperandTypeString(OPERAND_MIN_PRECISION_DEFAULT, psSig->eComponentType, ui32NumComponents); - std::ostringstream oss; - oss << typeName << " " << name; - oss << " [[ user(" << name << ") ]]"; + std::ostringstream oss; + oss << typeName << " " << name; + oss << " [[ user(" << name << ") ]]"; - std::string declString; - declString = oss.str(); + std::string declString; + declString = oss.str(); - m_StructDefinitions[GetInputStructName()].m_Members.push_back(std::make_pair(name, declString)); + m_StructDefinitions[GetInputStructName()].m_Members.push_back(std::make_pair(name, declString)); - std::string out = GetOutputStructName(); - m_StructDefinitions[out].m_Members.push_back(std::make_pair(name, declString)); + std::string out = GetOutputStructName(); + m_StructDefinitions[out].m_Members.push_back(std::make_pair(name, declString)); - // For preserving data layout, declare output struct as domain shader input, too - oss.str(""); - out += "In"; + // For preserving data layout, declare output struct as domain shader input, too + oss.str(""); + out += "In"; - oss << typeName << " " << name; - // VERTEX_SHADER hardcoded on purpose - uint32_t loc = psContext->psDependencies->GetVaryingLocation(name, VERTEX_SHADER, true); - oss << " [[ " << "attribute(" << loc << ")" << " ]] "; + oss << typeName << " " << name; + // VERTEX_SHADER hardcoded on purpose + uint32_t loc = psContext->psDependencies->GetVaryingLocation(name, VERTEX_SHADER, true); + oss << " [[ " << "attribute(" << loc << ")" << " ]] "; - psContext->m_Reflection.OnInputBinding(name, loc); - m_StructDefinitions[out].m_Members.push_back(std::make_pair(name, oss.str())); - } + psContext->m_Reflection.OnInputBinding(name, loc); + m_StructDefinitions[out].m_Members.push_back(std::make_pair(name, oss.str())); + } } void ToMetal::HandleOutputRedirect(const Declaration *psDecl, const std::string &typeName) { - const Operand *psOperand = &psDecl->asOperands[0]; - Shader *psShader = psContext->psShader; - int needsRedirect = 0; - const ShaderInfo::InOutSignature *psSig = NULL; - - int regSpace = psOperand->GetRegisterSpace(psContext); - if (regSpace == 0 && psShader->asPhases[psContext->currentPhase].acOutputNeedsRedirect[psOperand->ui32RegisterNumber] == 0xff) - { - needsRedirect = 1; - } - else if (regSpace == 1 && psShader->asPhases[psContext->currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] == 0xff) - { - needsRedirect = 1; - } - - if (needsRedirect == 1) - { - // TODO What if this is indexed? - ShaderPhase *psPhase = &psShader->asPhases[psContext->currentPhase]; - int comp = 0; - uint32_t origMask = psOperand->ui32CompMask; - - ASSERT(psContext->psShader->aIndexedOutput[regSpace][psOperand->ui32RegisterNumber] == 0); - - psContext->AddIndentation(); - bformata(psPhase->earlyMain, "%s phase%d_Output%d_%d;\n", typeName.c_str(), psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber); - - psPhase->hasPostShaderCode = 1; - psContext->currentGLSLString = &psPhase->postShaderCode; - - while (comp < 4) - { - int numComps = 0; - int hasCast = 0; - uint32_t mask, i; - psSig = NULL; - if (regSpace == 0) - psContext->psShader->sInfo.GetOutputSignatureFromRegister(psOperand->ui32RegisterNumber, 1 << comp, psContext->psShader->ui32CurrentVertexOutputStream, &psSig, true); - else - psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(psOperand->ui32RegisterNumber, 1 << comp, &psSig, true); - - // The register isn't necessarily packed full. Continue with the next component. - if (psSig == NULL) - { - comp++; - continue; - } - - numComps = HLSLcc::GetNumberBitsSet(psSig->ui32Mask); - mask = psSig->ui32Mask; - - ((Operand *)psOperand)->ui32CompMask = 1 << comp; - psContext->AddIndentation(); - bcatcstr(psPhase->postShaderCode, TranslateOperand(psOperand, TO_FLAG_NAME_ONLY).c_str()); - - bcatcstr(psPhase->postShaderCode, " = "); - - if (psSig->eComponentType == INOUT_COMPONENT_SINT32) - { - bformata(psPhase->postShaderCode, "as_type("); - hasCast = 1; - } - else if (psSig->eComponentType == INOUT_COMPONENT_UINT32) - { - bformata(psPhase->postShaderCode, "as_type("); - hasCast = 1; - } - bformata(psPhase->postShaderCode, "phase%d_Output%d_%d.", psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber); - // Print out mask - for (i = 0; i < 4; i++) - { - if ((mask & (1 << i)) == 0) - continue; - - bformata(psPhase->postShaderCode, "%c", "xyzw"[i]); - } - - if (hasCast) - bcatcstr(psPhase->postShaderCode, ")"); - comp += numComps; - bcatcstr(psPhase->postShaderCode, ";\n"); - } - - psContext->currentGLSLString = &psContext->glsl; - - ((Operand *)psOperand)->ui32CompMask = origMask; - if (regSpace == 0) - psShader->asPhases[psContext->currentPhase].acOutputNeedsRedirect[psOperand->ui32RegisterNumber] = 0xfe; - else - psShader->asPhases[psContext->currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] = 0xfe; - } + const Operand *psOperand = &psDecl->asOperands[0]; + Shader *psShader = psContext->psShader; + int needsRedirect = 0; + const ShaderInfo::InOutSignature *psSig = NULL; + + int regSpace = psOperand->GetRegisterSpace(psContext); + if (regSpace == 0 && psShader->asPhases[psContext->currentPhase].acOutputNeedsRedirect[psOperand->ui32RegisterNumber] == 0xff) + { + needsRedirect = 1; + } + else if (regSpace == 1 && psShader->asPhases[psContext->currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] == 0xff) + { + needsRedirect = 1; + } + + if (needsRedirect == 1) + { + // TODO What if this is indexed? + int comp = 0; + uint32_t origMask = psOperand->ui32CompMask; + + ASSERT(psContext->psShader->aIndexedOutput[regSpace][psOperand->ui32RegisterNumber] == 0); + + bformata(GetEarlyMain(psContext), "%s phase%d_Output%d_%d;\n", typeName.c_str(), psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber); + + while (comp < 4) + { + int numComps = 0; + int hasCast = 0; + uint32_t mask, i; + psSig = NULL; + if (regSpace == 0) + psContext->psShader->sInfo.GetOutputSignatureFromRegister(psOperand->ui32RegisterNumber, 1 << comp, psContext->psShader->ui32CurrentVertexOutputStream, &psSig, true); + else + psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(psOperand->ui32RegisterNumber, 1 << comp, &psSig, true); + + // The register isn't necessarily packed full. Continue with the next component. + if (psSig == NULL) + { + comp++; + continue; + } + + numComps = HLSLcc::GetNumberBitsSet(psSig->ui32Mask); + mask = psSig->ui32Mask; + + ((Operand *)psOperand)->ui32CompMask = 1 << comp; + bstring str = GetPostShaderCode(psContext); + bcatcstr(str, TranslateOperand(psOperand, TO_FLAG_NAME_ONLY).c_str()); + bcatcstr(str, " = "); + + if (psSig->eComponentType == INOUT_COMPONENT_SINT32) + { + bformata(str, "as_type("); + hasCast = 1; + } + else if (psSig->eComponentType == INOUT_COMPONENT_UINT32) + { + bformata(str, "as_type("); + hasCast = 1; + } + bformata(str, "phase%d_Output%d_%d.", psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber); + // Print out mask + for (i = 0; i < 4; i++) + { + if ((mask & (1 << i)) == 0) + continue; + + bformata(str, "%c", "xyzw"[i]); + } + + if (hasCast) + bcatcstr(str, ")"); + comp += numComps; + bcatcstr(str, ";\n"); + } + + ((Operand *)psOperand)->ui32CompMask = origMask; + if (regSpace == 0) + psShader->asPhases[psContext->currentPhase].acOutputNeedsRedirect[psOperand->ui32RegisterNumber] = 0xfe; + else + psShader->asPhases[psContext->currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] = 0xfe; + } } void ToMetal::HandleInputRedirect(const Declaration *psDecl, const std::string &typeName) { - Operand *psOperand = (Operand *)&psDecl->asOperands[0]; - Shader *psShader = psContext->psShader; - int needsRedirect = 0; - const ShaderInfo::InOutSignature *psSig = NULL; - - int regSpace = psOperand->GetRegisterSpace(psContext); - if (regSpace == 0) - { - if (psShader->asPhases[psContext->currentPhase].acInputNeedsRedirect[psOperand->ui32RegisterNumber] == 0xff) - needsRedirect = 1; - } - else if (psShader->asPhases[psContext->currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] == 0xff) - { - psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->ui32CompMask, &psSig); - needsRedirect = 1; - } - - if (needsRedirect == 1) - { - // TODO What if this is indexed? - ShaderPhase *psPhase = &psShader->asPhases[psContext->currentPhase]; - int needsLooping = 0; - int i = 0; - uint32_t origArraySize = 0; - uint32_t origMask = psOperand->ui32CompMask; - - ASSERT(psContext->psShader->aIndexedInput[regSpace][psOperand->ui32RegisterNumber] == 0); - - psContext->currentGLSLString = &psPhase->earlyMain; - psContext->AddIndentation(); - - bcatcstr(psPhase->earlyMain, " "); - // Does the input have multiple array components (such as geometry shader input, or domain shader control point input) - if ((psShader->eShaderType == DOMAIN_SHADER && regSpace == 0) || (psShader->eShaderType == GEOMETRY_SHADER)) - { - // The count is actually stored in psOperand->aui32ArraySizes[0] - origArraySize = psOperand->aui32ArraySizes[0]; - // bformata(glsl, "%s vec4 phase%d_Input%d_%d[%d];\n", Precision, psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber, origArraySize); - bformata(psPhase->earlyMain, "%s phase%d_Input%d_%d[%d];\n", typeName.c_str(), psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber, origArraySize); - needsLooping = 1; - i = origArraySize - 1; - } - else - // bformata(glsl, "%s vec4 phase%d_Input%d_%d;\n", Precision, psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber); - bformata(psPhase->earlyMain, "%s phase%d_Input%d_%d;\n", typeName.c_str(), psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber); - - // Do a conditional loop. In normal cases needsLooping == 0 so this is only run once. - do - { - int comp = 0; - bcatcstr(psPhase->earlyMain, " "); - if (needsLooping) - bformata(psPhase->earlyMain, "phase%d_Input%d_%d[%d] = %s(", psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber, i, typeName.c_str()); - else - bformata(psPhase->earlyMain, "phase%d_Input%d_%d = %s(", psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber, typeName.c_str()); - - while (comp < 4) - { - int numComps = 0; - int hasCast = 0; - int hasSig = 0; - if (regSpace == 0) - hasSig = psContext->psShader->sInfo.GetInputSignatureFromRegister(psOperand->ui32RegisterNumber, 1 << comp, &psSig, true); - else - hasSig = psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(psOperand->ui32RegisterNumber, 1 << comp, &psSig, true); - - if (hasSig) - { - numComps = HLSLcc::GetNumberBitsSet(psSig->ui32Mask); - if (psSig->eComponentType != INOUT_COMPONENT_FLOAT32) - { - if (numComps > 1) - bformata(psPhase->earlyMain, "as_type(", numComps); - else - bformata(psPhase->earlyMain, "as_type("); - hasCast = 1; - } - - // Override the array size of the operand so TranslateOperand call below prints the correct index - if (needsLooping) - psOperand->aui32ArraySizes[0] = i; - - // And the component mask - psOperand->ui32CompMask = 1 << comp; - - bformata(psPhase->earlyMain, TranslateOperand(psOperand, TO_FLAG_NAME_ONLY).c_str()); - - // Restore the original array size value and mask - psOperand->ui32CompMask = origMask; - if (needsLooping) - psOperand->aui32ArraySizes[0] = origArraySize; - - if (hasCast) - bcatcstr(psPhase->earlyMain, ")"); - comp += numComps; - } - else // no signature found -> fill with zero - { - bcatcstr(psPhase->earlyMain, "0"); - comp++; - } - - if (comp < 4) - bcatcstr(psPhase->earlyMain, ", "); - } - bcatcstr(psPhase->earlyMain, ");\n"); - - } while ((--i) >= 0); - - psContext->currentGLSLString = &psContext->glsl; - - if (regSpace == 0) - psShader->asPhases[psContext->currentPhase].acInputNeedsRedirect[psOperand->ui32RegisterNumber] = 0xfe; - else - psShader->asPhases[psContext->currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] = 0xfe; - } + Operand *psOperand = (Operand *)&psDecl->asOperands[0]; + Shader *psShader = psContext->psShader; + int needsRedirect = 0; + const ShaderInfo::InOutSignature *psSig = NULL; + + int regSpace = psOperand->GetRegisterSpace(psContext); + if (regSpace == 0) + { + if (psShader->asPhases[psContext->currentPhase].acInputNeedsRedirect[psOperand->ui32RegisterNumber] == 0xff) + needsRedirect = 1; + } + else if (psShader->asPhases[psContext->currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] == 0xff) + { + psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->ui32CompMask, &psSig); + needsRedirect = 1; + } + + if (needsRedirect == 1) + { + // TODO What if this is indexed? + int needsLooping = 0; + int i = 0; + uint32_t origArraySize = 0; + uint32_t origMask = psOperand->ui32CompMask; + + ASSERT(psContext->psShader->aIndexedInput[regSpace][psOperand->ui32RegisterNumber] == 0); + + ++psContext->indent; + + // Does the input have multiple array components (such as geometry shader input, or domain shader control point input) + if ((psShader->eShaderType == DOMAIN_SHADER && regSpace == 0) || (psShader->eShaderType == GEOMETRY_SHADER)) + { + // The count is actually stored in psOperand->aui32ArraySizes[0] + origArraySize = psOperand->aui32ArraySizes[0]; + // bformata(glsl, "%s vec4 phase%d_Input%d_%d[%d];\n", Precision, psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber, origArraySize); + bformata(GetEarlyMain(psContext), "%s phase%d_Input%d_%d[%d];\n", typeName.c_str(), psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber, origArraySize); + needsLooping = 1; + i = origArraySize - 1; + } + else + // bformata(glsl, "%s vec4 phase%d_Input%d_%d;\n", Precision, psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber); + bformata(GetEarlyMain(psContext), "%s phase%d_Input%d_%d;\n", typeName.c_str(), psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber); + + // Do a conditional loop. In normal cases needsLooping == 0 so this is only run once. + do + { + int comp = 0; + bstring str = GetEarlyMain(psContext); + if (needsLooping) + bformata(str, "phase%d_Input%d_%d[%d] = %s(", psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber, i, typeName.c_str()); + else + bformata(str, "phase%d_Input%d_%d = %s(", psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber, typeName.c_str()); + + while (comp < 4) + { + int numComps = 0; + int hasCast = 0; + int hasSig = 0; + if (regSpace == 0) + hasSig = psContext->psShader->sInfo.GetInputSignatureFromRegister(psOperand->ui32RegisterNumber, 1 << comp, &psSig, true); + else + hasSig = psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(psOperand->ui32RegisterNumber, 1 << comp, &psSig, true); + + if (hasSig) + { + numComps = HLSLcc::GetNumberBitsSet(psSig->ui32Mask); + if (psSig->eComponentType != INOUT_COMPONENT_FLOAT32) + { + if (numComps > 1) + bformata(str, "as_type(", numComps); + else + bformata(str, "as_type("); + hasCast = 1; + } + + // Override the array size of the operand so TranslateOperand call below prints the correct index + if (needsLooping) + psOperand->aui32ArraySizes[0] = i; + + // And the component mask + psOperand->ui32CompMask = 1 << comp; + + bformata(str, TranslateOperand(psOperand, TO_FLAG_NAME_ONLY).c_str()); + + // Restore the original array size value and mask + psOperand->ui32CompMask = origMask; + if (needsLooping) + psOperand->aui32ArraySizes[0] = origArraySize; + + if (hasCast) + bcatcstr(str, ")"); + comp += numComps; + } + else // no signature found -> fill with zero + { + bcatcstr(str, "0"); + comp++; + } + + if (comp < 4) + bcatcstr(str, ", "); + } + bcatcstr(str, ");\n"); + } + while ((--i) >= 0); + + --psContext->indent; + + if (regSpace == 0) + psShader->asPhases[psContext->currentPhase].acInputNeedsRedirect[psOperand->ui32RegisterNumber] = 0xfe; + else + psShader->asPhases[psContext->currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] = 0xfe; + } } static std::string TranslateResourceDeclaration(HLSLCrossCompilerContext* psContext, - const Declaration *psDecl, const std::string& textureName, - bool isDepthSampler, bool isUAV) + const Declaration *psDecl, const std::string& textureName, + bool isDepthSampler, bool isUAV) { - std::ostringstream oss; - const ResourceBinding* psBinding = 0; - const RESOURCE_DIMENSION eDimension = psDecl->value.eResourceDimension; - const uint32_t ui32RegisterNumber = psDecl->asOperands[0].ui32RegisterNumber; - REFLECT_RESOURCE_PRECISION ePrec = REFLECT_RESOURCE_PRECISION_UNKNOWN; - RESOURCE_RETURN_TYPE eType = RETURN_TYPE_UNORM; - std::string access = "sample"; - - if (isUAV) - { - if ((psDecl->sUAV.ui32AccessFlags & ACCESS_FLAG_WRITE) != 0) - { - access = "write"; - if (psContext->psShader->eShaderType != COMPUTE_SHADER) - psContext->m_Reflection.OnDiagnostics("This shader might not work on all Metal devices because of texture writes on non-compute shaders.", 0, false); - - if ((psDecl->sUAV.ui32AccessFlags & ACCESS_FLAG_READ) != 0) - { - access = "read_write"; - } - } - else - { - access = "read"; - eType = psDecl->sUAV.Type; - } - int found; - found = psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_UAV, ui32RegisterNumber, &psBinding); - if (found) - { - ePrec = psBinding->ePrecision; - eType = (RESOURCE_RETURN_TYPE)psBinding->ui32ReturnType; - // Figured out by reverse engineering bitcode. flags b00xx means float1, b01xx = float2, b10xx = float3 and b11xx = float4 - } - - } - else - { - int found; - found = psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_TEXTURE, ui32RegisterNumber, &psBinding); - if (found) - { - eType = (RESOURCE_RETURN_TYPE)psBinding->ui32ReturnType; - ePrec = psBinding->ePrecision; - - // TODO: it might make sense to propagate float earlier (as hlslcc might declare other variables depending on sampler prec) - // metal supports ONLY float32 depth textures - if(isDepthSampler) - { - switch(eDimension) - { - case RESOURCE_DIMENSION_TEXTURE2D: case RESOURCE_DIMENSION_TEXTURE2DMS: case RESOURCE_DIMENSION_TEXTURECUBE: - case RESOURCE_DIMENSION_TEXTURE2DARRAY: case RESOURCE_DIMENSION_TEXTURECUBEARRAY: - ePrec = REFLECT_RESOURCE_PRECISION_HIGHP, eType = RETURN_TYPE_FLOAT; break; - default: - break; - } - } - } - switch (eDimension) - { - case RESOURCE_DIMENSION_BUFFER: - case RESOURCE_DIMENSION_TEXTURE2DMS: - case RESOURCE_DIMENSION_TEXTURE2DMSARRAY: - access = "read"; - default: - break; - } - } - - SHADER_VARIABLE_TYPE svtType = HLSLcc::ResourceReturnTypeToSVTType(eType, ePrec); - std::string typeName = HLSLcc::GetConstructorForTypeMetal(svtType, 1); - - if ((textureName == "_CameraDepthTexture" || textureName == "_LastCameraDepthTexture") && svtType != SVT_FLOAT) - { - std::string msg = textureName + " should be float on Metal (use sampler2D or sampler2D_float). Incorrect type " - "can cause Metal validation failures or undefined results on some devices."; - psContext->m_Reflection.OnDiagnostics(msg, 0, false); - } - - switch (eDimension) - { - case RESOURCE_DIMENSION_BUFFER: - { - oss << "texture1d<" << typeName << ", access::"<< access <<" >"; - return oss.str(); - break; - } - - case RESOURCE_DIMENSION_TEXTURE1D: - { - oss << "texture1d<" << typeName << ", access::" << access << " >"; - return oss.str(); - break; - } - - case RESOURCE_DIMENSION_TEXTURE2D: - { - oss << (isDepthSampler ? "depth2d<" : "texture2d<") << typeName << ", access::" << access << " >"; - return oss.str(); - break; - } - - case RESOURCE_DIMENSION_TEXTURE2DMS: - { - oss << (isDepthSampler ? "depth2d_ms<" : "texture2d_ms<") << typeName << ", access::" << access << " >"; - return oss.str(); - break; - } - - case RESOURCE_DIMENSION_TEXTURE3D: - { - oss << "texture3d<" << typeName << ", access::" << access << " >"; - return oss.str(); - break; - } - - case RESOURCE_DIMENSION_TEXTURECUBE: - { - oss << (isDepthSampler ? "depthcube<" : "texturecube<") << typeName << ", access::" << access << " >"; - return oss.str(); - break; - } - - case RESOURCE_DIMENSION_TEXTURE1DARRAY: - { - oss << "texture1d_array<" << typeName << ", access::" << access << " >"; - return oss.str(); - break; - } - - case RESOURCE_DIMENSION_TEXTURE2DARRAY: - { - oss << (isDepthSampler ? "depth2d_array<" : "texture2d_array<") << typeName << ", access::" << access << " >"; - return oss.str(); - break; - } - - case RESOURCE_DIMENSION_TEXTURE2DMSARRAY: - { - // Not really supported in Metal but let's print it here anyway - oss << "texture2d_ms_array<" << typeName << ", access::" << access << " >"; - return oss.str(); - break; - } - - case RESOURCE_DIMENSION_TEXTURECUBEARRAY: - { - oss << (isDepthSampler ? "depthcube_array<" : "texturecube_array<") << typeName << ", access::" << access << " >"; - return oss.str(); - break; - } - default: - ASSERT(0); - oss << "texture2d<" << typeName << ", access::" << access << " >"; - return oss.str(); - } + std::ostringstream oss; + const ResourceBinding* psBinding = 0; + const RESOURCE_DIMENSION eDimension = psDecl->value.eResourceDimension; + const uint32_t ui32RegisterNumber = psDecl->asOperands[0].ui32RegisterNumber; + REFLECT_RESOURCE_PRECISION ePrec = REFLECT_RESOURCE_PRECISION_UNKNOWN; + RESOURCE_RETURN_TYPE eType = RETURN_TYPE_UNORM; + std::string access = "sample"; + + if (isUAV) + { + if ((psDecl->sUAV.ui32AccessFlags & ACCESS_FLAG_WRITE) != 0) + { + access = "write"; + if (psContext->psShader->eShaderType != COMPUTE_SHADER) + psContext->m_Reflection.OnDiagnostics("This shader might not work on all Metal devices because of texture writes on non-compute shaders.", 0, false); + + if ((psDecl->sUAV.ui32AccessFlags & ACCESS_FLAG_READ) != 0) + { + access = "read_write"; + } + } + else + { + access = "read"; + eType = psDecl->sUAV.Type; + } + int found; + found = psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_UAV, ui32RegisterNumber, &psBinding); + if (found) + { + ePrec = psBinding->ePrecision; + eType = (RESOURCE_RETURN_TYPE)psBinding->ui32ReturnType; + // Figured out by reverse engineering bitcode. flags b00xx means float1, b01xx = float2, b10xx = float3 and b11xx = float4 + } + } + else + { + int found; + found = psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_TEXTURE, ui32RegisterNumber, &psBinding); + if (found) + { + eType = (RESOURCE_RETURN_TYPE)psBinding->ui32ReturnType; + ePrec = psBinding->ePrecision; + + // TODO: it might make sense to propagate float earlier (as hlslcc might declare other variables depending on sampler prec) + // metal supports ONLY float32 depth textures + if (isDepthSampler) + { + switch (eDimension) + { + case RESOURCE_DIMENSION_TEXTURE2D: case RESOURCE_DIMENSION_TEXTURE2DMS: case RESOURCE_DIMENSION_TEXTURECUBE: + case RESOURCE_DIMENSION_TEXTURE2DARRAY: case RESOURCE_DIMENSION_TEXTURECUBEARRAY: + ePrec = REFLECT_RESOURCE_PRECISION_HIGHP, eType = RETURN_TYPE_FLOAT; break; + default: + break; + } + } + } + switch (eDimension) + { + case RESOURCE_DIMENSION_BUFFER: + case RESOURCE_DIMENSION_TEXTURE2DMS: + case RESOURCE_DIMENSION_TEXTURE2DMSARRAY: + access = "read"; + default: + break; + } + } + SHADER_VARIABLE_TYPE svtType = HLSLcc::ResourceReturnTypeToSVTType(eType, ePrec); + std::string typeName = HLSLcc::GetConstructorForTypeMetal(svtType, 1); + + if ((textureName == "_CameraDepthTexture" || textureName == "_LastCameraDepthTexture") && svtType != SVT_FLOAT) + { + std::string msg = textureName + " should be float on Metal (use sampler2D or sampler2D_float). Incorrect type " + "can cause Metal validation failures or undefined results on some devices."; + psContext->m_Reflection.OnDiagnostics(msg, 0, false); + } + + switch (eDimension) + { + case RESOURCE_DIMENSION_BUFFER: + { + oss << "texture1d<" << typeName << ", access::" << access << " >"; + return oss.str(); + break; + } + + case RESOURCE_DIMENSION_TEXTURE1D: + { + oss << "texture1d<" << typeName << ", access::" << access << " >"; + return oss.str(); + break; + } + + case RESOURCE_DIMENSION_TEXTURE2D: + { + oss << (isDepthSampler ? "depth2d<" : "texture2d<") << typeName << ", access::" << access << " >"; + return oss.str(); + break; + } + + case RESOURCE_DIMENSION_TEXTURE2DMS: + { + oss << (isDepthSampler ? "depth2d_ms<" : "texture2d_ms<") << typeName << ", access::" << access << " >"; + return oss.str(); + break; + } + + case RESOURCE_DIMENSION_TEXTURE3D: + { + oss << "texture3d<" << typeName << ", access::" << access << " >"; + return oss.str(); + break; + } + + case RESOURCE_DIMENSION_TEXTURECUBE: + { + oss << (isDepthSampler ? "depthcube<" : "texturecube<") << typeName << ", access::" << access << " >"; + return oss.str(); + break; + } + + case RESOURCE_DIMENSION_TEXTURE1DARRAY: + { + oss << "texture1d_array<" << typeName << ", access::" << access << " >"; + return oss.str(); + break; + } + + case RESOURCE_DIMENSION_TEXTURE2DARRAY: + { + oss << (isDepthSampler ? "depth2d_array<" : "texture2d_array<") << typeName << ", access::" << access << " >"; + return oss.str(); + break; + } + + case RESOURCE_DIMENSION_TEXTURE2DMSARRAY: + { + // Not really supported in Metal but let's print it here anyway + oss << "texture2d_ms_array<" << typeName << ", access::" << access << " >"; + return oss.str(); + break; + } + + case RESOURCE_DIMENSION_TEXTURECUBEARRAY: + { + oss << (isDepthSampler ? "depthcube_array<" : "texturecube_array<") << typeName << ", access::" << access << " >"; + return oss.str(); + break; + } + default: + ASSERT(0); + oss << "texture2d<" << typeName << ", access::" << access << " >"; + return oss.str(); + } } static std::string GetInterpolationString(INTERPOLATION_MODE eMode) { - switch (eMode) - { - case INTERPOLATION_CONSTANT: - return " [[ flat ]]"; + switch (eMode) + { + case INTERPOLATION_CONSTANT: + return " [[ flat ]]"; - case INTERPOLATION_LINEAR: - return ""; + case INTERPOLATION_LINEAR: + return ""; - case INTERPOLATION_LINEAR_CENTROID: - return " [[ centroid_perspective ]]"; + case INTERPOLATION_LINEAR_CENTROID: + return " [[ centroid_perspective ]]"; - case INTERPOLATION_LINEAR_NOPERSPECTIVE: - return " [[ center_no_perspective ]]"; + case INTERPOLATION_LINEAR_NOPERSPECTIVE: + return " [[ center_no_perspective ]]"; - case INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID: - return " [[ centroid_no_perspective ]]"; + case INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID: + return " [[ centroid_no_perspective ]]"; - case INTERPOLATION_LINEAR_SAMPLE: - return " [[ sample_perspective ]]"; + case INTERPOLATION_LINEAR_SAMPLE: + return " [[ sample_perspective ]]"; - case INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE: - return " [[ sample_no_perspective ]]"; - default: - ASSERT(0); - return ""; - } + case INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE: + return " [[ sample_no_perspective ]]"; + default: + ASSERT(0); + return ""; + } } - -void ToMetal::DeclareStructVariable(const std::string &parentName, const ShaderVar &var, bool withinCB, uint32_t cumulativeOffset) +void ToMetal::DeclareStructVariable(const std::string &parentName, const ShaderVar &var, bool withinCB, uint32_t cumulativeOffset, bool isUsed) { - DeclareStructVariable(parentName, var.sType, withinCB, cumulativeOffset + var.ui32StartOffset); + DeclareStructVariable(parentName, var.sType, withinCB, cumulativeOffset + var.ui32StartOffset, isUsed); } -void ToMetal::DeclareStructVariable(const std::string &parentName, const ShaderVarType &var, bool withinCB, uint32_t cumulativeOffset) +void ToMetal::DeclareStructVariable(const std::string &parentName, const ShaderVarType &var, bool withinCB, uint32_t cumulativeOffset, bool isUsed) { - // CB arrays need to be defined as 4 component vectors to match DX11 data layout - bool arrayWithinCB = (withinCB && (var.Elements > 1) && (psContext->psShader->eShaderType == COMPUTE_SHADER)); - bool doDeclare = true; - - if (var.Class == SVC_STRUCT) - { - if (m_StructDefinitions.find(var.name + "_Type") == m_StructDefinitions.end()) - DeclareStructType(var.name + "_Type", var.Members, withinCB, cumulativeOffset + var.Offset); - - // Report Array-of-Struct CB top-level struct var after all members are reported. - if (var.Parent == NULL && var.Elements > 1 && withinCB) - { - // var.Type being SVT_VOID indicates it is a struct in this case. - psContext->m_Reflection.OnConstant(var.fullName, var.Offset + cumulativeOffset, var.Type, var.Rows, var.Columns, false, var.Elements); - } - - std::ostringstream oss; - oss << var.name << "_Type " << var.name; - if (var.Elements > 1) - { - oss << "[" << var.Elements << "]"; - } - m_StructDefinitions[parentName].m_Members.push_back(std::make_pair(var.name, oss.str())); - m_StructDefinitions[parentName].m_Dependencies.push_back(var.name + "_Type"); - return; - } - - else if (var.Class == SVC_MATRIX_COLUMNS || var.Class == SVC_MATRIX_ROWS) - { - std::ostringstream oss; - if (psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) - { - // Translate matrices into vec4 arrays - char prefix[256]; - sprintf(prefix, HLSLCC_TRANSLATE_MATRIX_FORMAT_STRING, var.Rows, var.Columns); - oss << HLSLcc::GetConstructorForType(psContext, var.Type, 4) << " " << prefix << var.name; - - uint32_t elemCount = (var.Class == SVC_MATRIX_COLUMNS ? var.Columns : var.Rows); - if (var.Elements > 1) - { - elemCount *= var.Elements; - } - oss << "[" << elemCount << "]"; - - if(withinCB) - { - // On compute shaders we need to reflect the vec array as it is to support all possible matrix sizes correctly. - // On non-compute we can fake that we still have a matrix, as CB upload code will fill the data correctly on 4x4 matrices. - // That way we avoid the issues with mismatching types for builtins etc. - if (psContext->psShader->eShaderType == COMPUTE_SHADER) - doDeclare = psContext->m_Reflection.OnConstant(var.fullName, var.Offset + cumulativeOffset, var.Type, 1, 4, false, elemCount); - else - doDeclare = psContext->m_Reflection.OnConstant(var.fullName, var.Offset + cumulativeOffset, var.Type, var.Rows, var.Columns, true, var.Elements); - } - } - else - { - oss << HLSLcc::GetMatrixTypeName(psContext, var.Type, var.Columns, var.Rows); - oss << " " << var.name; - if (var.Elements > 1) - { - oss << "[" << var.Elements << "]"; - } - - // TODO Verify whether the offset is from the beginning of the CB or from the beginning of the struct - if(withinCB) - doDeclare = psContext->m_Reflection.OnConstant(var.fullName, var.Offset + cumulativeOffset, var.Type, var.Rows, var.Columns, true, var.Elements); - } - - if (doDeclare) - m_StructDefinitions[parentName].m_Members.push_back(std::make_pair(var.name, oss.str())); - } - else - if (var.Class == SVC_VECTOR && var.Columns > 1) - { - std::ostringstream oss; - oss << HLSLcc::GetConstructorForTypeMetal(var.Type, arrayWithinCB ? 4 : var.Columns); - oss << " " << var.name; - if (var.Elements > 1) - { - oss << "[" << var.Elements << "]"; - } - - if (withinCB) - doDeclare = psContext->m_Reflection.OnConstant(var.fullName, var.Offset + cumulativeOffset, var.Type, 1, var.Columns, false, var.Elements); - - if (doDeclare) - m_StructDefinitions[parentName].m_Members.push_back(std::make_pair(var.name, oss.str())); - } - else - if ((var.Class == SVC_SCALAR) || - (var.Class == SVC_VECTOR && var.Columns == 1)) - { - if (var.Type == SVT_BOOL) - { - //Use int instead of bool. - //Allows implicit conversions to integer and - //bool consumes 4-bytes in HLSL and GLSL anyway. - ((ShaderVarType &)var).Type = SVT_INT; - } - - std::ostringstream oss; - oss << HLSLcc::GetConstructorForTypeMetal(var.Type, arrayWithinCB ? 4 : 1); - oss << " " << var.name; - if (var.Elements > 1) - { - oss << "[" << var.Elements << "]"; - } - - if (withinCB) - doDeclare = psContext->m_Reflection.OnConstant(var.fullName, var.Offset + cumulativeOffset, var.Type, 1, 1, false, var.Elements); - - if (doDeclare) - m_StructDefinitions[parentName].m_Members.push_back(std::make_pair(var.name, oss.str())); - } - else - { - ASSERT(0); - } + // CB arrays need to be defined as 4 component vectors to match DX11 data layout + bool arrayWithinCB = (withinCB && (var.Elements > 1) && (psContext->psShader->eShaderType == COMPUTE_SHADER)); + bool doDeclare = true; + + if (isUsed == false && ((psContext->flags & HLSLCC_FLAG_REMOVE_UNUSED_GLOBALS)) == 0) + isUsed = true; + + if (var.Class == SVC_STRUCT) + { + if (m_StructDefinitions.find(var.name + "_Type") == m_StructDefinitions.end()) + DeclareStructType(var.name + "_Type", var.Members, withinCB, cumulativeOffset + var.Offset); + + // Report Array-of-Struct CB top-level struct var after all members are reported. + if (var.Parent == NULL && var.Elements > 1 && withinCB) + { + // var.Type being SVT_VOID indicates it is a struct in this case. + psContext->m_Reflection.OnConstant(var.fullName, var.Offset + cumulativeOffset, var.Type, var.Rows, var.Columns, false, var.Elements, true); + } + + std::ostringstream oss; + oss << var.name << "_Type " << var.name; + if (var.Elements > 1) + { + oss << "[" << var.Elements << "]"; + } + m_StructDefinitions[parentName].m_Members.push_back(std::make_pair(var.name, oss.str())); + m_StructDefinitions[parentName].m_Dependencies.push_back(var.name + "_Type"); + return; + } + else if (var.Class == SVC_MATRIX_COLUMNS || var.Class == SVC_MATRIX_ROWS) + { + std::ostringstream oss; + if (psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) + { + // Translate matrices into vec4 arrays + char prefix[256]; + sprintf(prefix, HLSLCC_TRANSLATE_MATRIX_FORMAT_STRING, var.Rows, var.Columns); + oss << HLSLcc::GetConstructorForType(psContext, var.Type, 4) << " " << prefix << var.name; + + uint32_t elemCount = (var.Class == SVC_MATRIX_COLUMNS ? var.Columns : var.Rows); + if (var.Elements > 1) + { + elemCount *= var.Elements; + } + oss << "[" << elemCount << "]"; + + if (withinCB) + { + // On compute shaders we need to reflect the vec array as it is to support all possible matrix sizes correctly. + // On non-compute we can fake that we still have a matrix, as CB upload code will fill the data correctly on 4x4 matrices. + // That way we avoid the issues with mismatching types for builtins etc. + if (psContext->psShader->eShaderType == COMPUTE_SHADER) + doDeclare = psContext->m_Reflection.OnConstant(var.fullName, var.Offset + cumulativeOffset, var.Type, 1, 4, false, elemCount, isUsed); + else + doDeclare = psContext->m_Reflection.OnConstant(var.fullName, var.Offset + cumulativeOffset, var.Type, var.Rows, var.Columns, true, var.Elements, isUsed); + } + } + else + { + oss << HLSLcc::GetMatrixTypeName(psContext, var.Type, var.Columns, var.Rows); + oss << " " << var.name; + if (var.Elements > 1) + { + oss << "[" << var.Elements << "]"; + } + + // TODO Verify whether the offset is from the beginning of the CB or from the beginning of the struct + if (withinCB) + doDeclare = psContext->m_Reflection.OnConstant(var.fullName, var.Offset + cumulativeOffset, var.Type, var.Rows, var.Columns, true, var.Elements, isUsed); + } + + if (doDeclare) + m_StructDefinitions[parentName].m_Members.push_back(std::make_pair(var.name, oss.str())); + } + else if (var.Class == SVC_VECTOR && var.Columns > 1) + { + std::ostringstream oss; + oss << HLSLcc::GetConstructorForTypeMetal(var.Type, arrayWithinCB ? 4 : var.Columns); + oss << " " << var.name; + if (var.Elements > 1) + { + oss << "[" << var.Elements << "]"; + } + + if (withinCB) + doDeclare = psContext->m_Reflection.OnConstant(var.fullName, var.Offset + cumulativeOffset, var.Type, 1, var.Columns, false, var.Elements, isUsed); + + if (doDeclare) + m_StructDefinitions[parentName].m_Members.push_back(std::make_pair(var.name, oss.str())); + } + else if ((var.Class == SVC_SCALAR) || + (var.Class == SVC_VECTOR && var.Columns == 1)) + { + if (var.Type == SVT_BOOL) + { + //Use int instead of bool. + //Allows implicit conversions to integer and + //bool consumes 4-bytes in HLSL and GLSL anyway. + ((ShaderVarType &)var).Type = SVT_INT; + } + + std::ostringstream oss; + oss << HLSLcc::GetConstructorForTypeMetal(var.Type, arrayWithinCB ? 4 : 1); + oss << " " << var.name; + if (var.Elements > 1) + { + oss << "[" << var.Elements << "]"; + } + + if (withinCB) + doDeclare = psContext->m_Reflection.OnConstant(var.fullName, var.Offset + cumulativeOffset, var.Type, 1, 1, false, var.Elements, isUsed); + + if (doDeclare) + m_StructDefinitions[parentName].m_Members.push_back(std::make_pair(var.name, oss.str())); + } + else + { + ASSERT(0); + } } void ToMetal::DeclareStructType(const std::string &name, const std::vector &contents, bool withinCB, uint32_t cumulativeOffset, bool stripUnused /* = false */) { - for (std::vector::const_iterator itr = contents.begin(); itr != contents.end(); itr++) - { - if(stripUnused && !itr->sType.m_IsUsed) - continue; - - DeclareStructVariable(name, *itr, withinCB, cumulativeOffset); - } + for (std::vector::const_iterator itr = contents.begin(); itr != contents.end(); itr++) + { + if (stripUnused && !itr->sType.m_IsUsed) + continue; + + DeclareStructVariable(name, *itr, withinCB, cumulativeOffset, itr->sType.m_IsUsed); + } } void ToMetal::DeclareStructType(const std::string &name, const std::vector &contents, bool withinCB, uint32_t cumulativeOffset) { - for (std::vector::const_iterator itr = contents.begin(); itr != contents.end(); itr++) - { - DeclareStructVariable(name, *itr, withinCB, cumulativeOffset); - } + for (std::vector::const_iterator itr = contents.begin(); itr != contents.end(); itr++) + { + DeclareStructVariable(name, *itr, withinCB, cumulativeOffset); + } } void ToMetal::DeclareConstantBuffer(const ConstantBuffer *psCBuf, uint32_t ui32BindingPoint) { - const bool isGlobals = (psCBuf->name == "$Globals"); - const bool stripUnused = isGlobals && (psContext->flags & HLSLCC_FLAG_REMOVE_UNUSED_GLOBALS); - std::string cbname = GetCBName(psCBuf->name); + const bool isGlobals = (psCBuf->name == "$Globals"); + const bool stripUnused = isGlobals && (psContext->flags & HLSLCC_FLAG_REMOVE_UNUSED_GLOBALS); + std::string cbname = GetCBName(psCBuf->name); - // Note: if we're stripping unused members, both ui32TotalSizeInBytes and individual offsets into reflection will be completely off. - // However, the reflection layer re-calculates both to match Metal alignment rules anyway, so we're good. - if (!psContext->m_Reflection.OnConstantBuffer(cbname, psCBuf->ui32TotalSizeInBytes, psCBuf->GetMemberCount(stripUnused))) - return; + // Note: if we're stripping unused members, both ui32TotalSizeInBytes and individual offsets into reflection will be completely off. + // However, the reflection layer re-calculates both to match Metal alignment rules anyway, so we're good. + if (!psContext->m_Reflection.OnConstantBuffer(cbname, psCBuf->ui32TotalSizeInBytes, psCBuf->GetMemberCount(stripUnused))) + return; - if (psContext->psDependencies->IsMemberDeclared(cbname)) - return; + if (psContext->psDependencies->IsMemberDeclared(cbname)) + return; - DeclareStructType(cbname + "_Type", psCBuf->asVars, true, 0, stripUnused); + DeclareStructType(cbname + "_Type", psCBuf->asVars, true, 0, stripUnused); - std::ostringstream oss; - uint32_t slot = m_BufferSlots.GetBindingSlot(ui32BindingPoint, BindingSlotAllocator::ConstantBuffer); + std::ostringstream oss; + uint32_t slot = m_BufferSlots.GetBindingSlot(ui32BindingPoint, BindingSlotAllocator::ConstantBuffer); if (HLSLcc::IsUnityFlexibleInstancingBuffer(psCBuf)) oss << "const constant " << psCBuf->asVars[0].name << "_Type* "; @@ -1071,1119 +1064,1105 @@ void ToMetal::DeclareConstantBuffer(const ConstantBuffer *psCBuf, uint32_t ui32B oss << "constant " << cbname << "_Type& "; oss << cbname << " [[ buffer(" << slot << ") ]]"; - m_StructDefinitions[""].m_Members.push_back(std::make_pair(cbname, oss.str())); - m_StructDefinitions[""].m_Dependencies.push_back(cbname + "_Type"); - psContext->m_Reflection.OnConstantBufferBinding(cbname, slot); + m_StructDefinitions[""].m_Members.push_back(std::make_pair(cbname, oss.str())); + m_StructDefinitions[""].m_Dependencies.push_back(cbname + "_Type"); + psContext->m_Reflection.OnConstantBufferBinding(cbname, slot); } void ToMetal::DeclareBufferVariable(const Declaration *psDecl, bool isRaw, bool isUAV) { - uint32_t regNo = psDecl->asOperands[0].ui32RegisterNumber; - std::string BufName, BufType, BufConst; - - BufName = ""; - BufType = ""; - BufConst = ""; - - BufName = ResourceName(isUAV ? RGROUP_UAV : RGROUP_TEXTURE, regNo); - - if (!isRaw) // declare struct containing uint array when needed - { - std::ostringstream typeoss; - BufType = BufName + "_Type"; - typeoss << "uint value["; - typeoss << psDecl->ui32BufferStride / 4 << "]"; - m_StructDefinitions[BufType].m_Members.push_back(std::make_pair("value", typeoss.str())); - m_StructDefinitions[""].m_Dependencies.push_back(BufType); - } - - if (!psContext->psDependencies->IsMemberDeclared(BufName)) - { - std::ostringstream oss; - - if (!isUAV || ((psDecl->sUAV.ui32AccessFlags & ACCESS_FLAG_WRITE) == 0)) - { - BufConst = "const "; - oss << BufConst; - } - else - { - if (psContext->psShader->eShaderType != COMPUTE_SHADER) - psContext->m_Reflection.OnDiagnostics("This shader might not work on all Metal devices because of buffer writes on non-compute shaders.", 0, false); - } - - if (isRaw) - oss << "device uint *" << BufName; - else - oss << "device " << BufType << " *" << BufName; - - uint32_t loc = m_BufferSlots.GetBindingSlot(regNo, isUAV ? BindingSlotAllocator::RWBuffer : BindingSlotAllocator::Texture); - oss << " [[ buffer(" << loc << ") ]]"; - - m_StructDefinitions[""].m_Members.push_back(std::make_pair(BufName, oss.str())); - psContext->m_Reflection.OnBufferBinding(BufName, loc, isUAV); - } - - // In addition to the actual declaration, we need pointer modification and possible counter declaration - // in early main: - std::ostringstream earlymainoss; - - // Possible counter is always in the beginning of the buffer - if (isUAV && psDecl->sUAV.bCounter) - { - earlymainoss << " device atomic_uint *" << BufName << "_counter = reinterpret_cast (" << BufName << ");\n"; - } - - // Some GPUs don't allow memory access below buffer binding offset in the shader so always bind compute buffer - // at offset 0 instead of GetDataOffset(). - // We can't tell at shader compile time if the buffer actually has counter or not. Therefore we'll always reserve - // space for the counter and bump the data pointer to beginning of the actual data here. - earlymainoss << " " << BufName << " = reinterpret_cast<" << BufConst - << "device " << (isRaw ? "uint" : BufType) << " *> (reinterpret_cast (" << BufName << ") + 1);\n"; - - bformata(psContext->psShader->asPhases[psContext->currentPhase].earlyMain, earlymainoss.str().c_str()); -} + uint32_t regNo = psDecl->asOperands[0].ui32RegisterNumber; + std::string BufName, BufType, BufConst; + + BufName = ""; + BufType = ""; + BufConst = ""; + + BufName = ResourceName(isUAV ? RGROUP_UAV : RGROUP_TEXTURE, regNo); + + if (!isRaw) // declare struct containing uint array when needed + { + std::ostringstream typeoss; + BufType = BufName + "_Type"; + typeoss << "uint value["; + typeoss << psDecl->ui32BufferStride / 4 << "]"; + m_StructDefinitions[BufType].m_Members.push_back(std::make_pair("value", typeoss.str())); + m_StructDefinitions[""].m_Dependencies.push_back(BufType); + } + + if (!psContext->psDependencies->IsMemberDeclared(BufName)) + { + std::ostringstream oss; + + if (!isUAV || ((psDecl->sUAV.ui32AccessFlags & ACCESS_FLAG_WRITE) == 0)) + { + BufConst = "const "; + oss << BufConst; + } + else + { + if (psContext->psShader->eShaderType != COMPUTE_SHADER) + psContext->m_Reflection.OnDiagnostics("This shader might not work on all Metal devices because of buffer writes on non-compute shaders.", 0, false); + } + + if (isRaw) + oss << "device uint *" << BufName; + else + oss << "device " << BufType << " *" << BufName; + + uint32_t loc = m_BufferSlots.GetBindingSlot(regNo, isUAV ? BindingSlotAllocator::RWBuffer : BindingSlotAllocator::Texture); + oss << " [[ buffer(" << loc << ") ]]"; + + m_StructDefinitions[""].m_Members.push_back(std::make_pair(BufName, oss.str())); + psContext->m_Reflection.OnBufferBinding(BufName, loc, isUAV); + } + // In addition to the actual declaration, we need pointer modification and possible counter declaration + // in early main: + + // Possible counter is always in the beginning of the buffer + if (isUAV && psDecl->sUAV.bCounter) + { + bformata(GetEarlyMain(psContext), "device atomic_uint *%s_counter = reinterpret_cast (%s);\n", BufName.c_str(), BufName.c_str()); + } + + // Some GPUs don't allow memory access below buffer binding offset in the shader so always bind compute buffer + // at offset 0 instead of GetDataOffset(). + // We can't tell at shader compile time if the buffer actually has counter or not. Therefore we'll always reserve + // space for the counter and bump the data pointer to beginning of the actual data here. + bformata(GetEarlyMain(psContext), "%s = reinterpret_cast<%sdevice %s *> (reinterpret_cast (%s) + 1);\n", BufName.c_str(), BufConst.c_str(), (isRaw ? "uint" : BufType.c_str()), BufConst.c_str(), BufName.c_str()); +} static int ParseInlineSamplerWrapMode(const std::string& samplerName, const std::string& wrapName) { - int res = 0; - const bool hasWrap = (samplerName.find(wrapName) != std::string::npos); - if (!hasWrap) - return res; - - const bool hasU = (samplerName.find(wrapName + 'u') != std::string::npos); - const bool hasV = (samplerName.find(wrapName + 'v') != std::string::npos); - const bool hasW = (samplerName.find(wrapName + 'w') != std::string::npos); - - if (hasWrap) res |= 1; - if (hasU) res |= 2; - if (hasV) res |= 4; - if (hasW) res |= 8; - return res; + int res = 0; + const bool hasWrap = (samplerName.find(wrapName) != std::string::npos); + if (!hasWrap) + return res; + + const bool hasU = (samplerName.find(wrapName + 'u') != std::string::npos); + const bool hasV = (samplerName.find(wrapName + 'v') != std::string::npos); + const bool hasW = (samplerName.find(wrapName + 'w') != std::string::npos); + + if (hasWrap) res |= 1; + if (hasU) res |= 2; + if (hasV) res |= 4; + if (hasW) res |= 8; + return res; } - -static bool EmitInlineSampler(HLSLCrossCompilerContext* ctx, const std::string& name) +static bool EmitInlineSampler(HLSLCrossCompilerContext* psContext, const std::string& name) { - // See if it's a sampler that goes with the texture, or an "inline" sampler - // where sampler states are hardcoded in the shader directly. - // - // The logic for "inline" samplers below must match what is recognized - // by other shader platforms in Unity (ParseInlineSamplerName function - // in the shader compiler). - - std::string samplerName(name); std::transform(samplerName.begin(), samplerName.end(), samplerName.begin(), ::tolower); - - // filter modes - const bool hasPoint = (samplerName.find("point") != std::string::npos); - const bool hasTrilinear = (samplerName.find("trilinear") != std::string::npos); - const bool hasLinear = (samplerName.find("linear") != std::string::npos); - const bool hasAnyFilter = hasPoint || hasTrilinear || hasLinear; - - // wrap modes - const int bitsClamp = ParseInlineSamplerWrapMode(samplerName, "clamp"); - const int bitsRepeat = ParseInlineSamplerWrapMode(samplerName, "repeat"); - const int bitsMirror = ParseInlineSamplerWrapMode(samplerName, "mirror"); - const int bitsMirrorOnce = ParseInlineSamplerWrapMode(samplerName, "mirroronce"); - - const bool hasAnyWrap = bitsClamp != 0 || bitsRepeat != 0 || bitsMirror != 0 || bitsMirrorOnce != 0; - - // depth comparison - const bool hasCompare = (samplerName.find("compare") != std::string::npos); - - // name must contain a filter mode and a wrap mode at least - if (!hasAnyFilter || !hasAnyWrap) - { - return false; - } - - bstring str = ctx->psShader->asPhases[ctx->currentPhase].earlyMain; - bformata(str, "\tconstexpr sampler %s(", name.c_str()); - - if (hasCompare) - bformata(str, "compare_func::greater_equal,"); - - if (hasTrilinear) - bformata(str, "filter::linear,mip_filter::linear,"); - else if (hasLinear) - bformata(str, "filter::linear,"); - else - bformata(str, "filter::nearest,"); - - const char* kTexWrapClamp = "clamp_to_edge"; - const char* kTexWrapRepeat = "repeat"; - const char* kTexWrapMirror = "mirrored_repeat"; - const char* kTexWrapMirrorOnce = "mirrored_repeat"; // currently Metal shading language does not have syntax for inline sampler state that would do "mirror clamp to edge" - const char* wrapU = kTexWrapRepeat; - const char* wrapV = kTexWrapRepeat; - const char* wrapW = kTexWrapRepeat; - - if (bitsClamp == 1) wrapU = wrapV = wrapW = kTexWrapClamp; - else if (bitsRepeat == 1) wrapU = wrapV = wrapW = kTexWrapRepeat; - else if (bitsMirrorOnce == 1) wrapU = wrapV = wrapW = kTexWrapMirrorOnce; - else if (bitsMirror == 1) wrapU = wrapV = wrapW = kTexWrapMirror; - - if ((bitsClamp & 2) != 0) wrapU = kTexWrapClamp; - if ((bitsClamp & 4) != 0) wrapV = kTexWrapClamp; - if ((bitsClamp & 8) != 0) wrapW = kTexWrapClamp; - - if ((bitsRepeat & 2) != 0) wrapU = kTexWrapRepeat; - if ((bitsRepeat & 4) != 0) wrapV = kTexWrapRepeat; - if ((bitsRepeat & 8) != 0) wrapW = kTexWrapRepeat; - - if ((bitsMirrorOnce & 2) != 0) wrapU = kTexWrapMirrorOnce; - if ((bitsMirrorOnce & 4) != 0) wrapV = kTexWrapMirrorOnce; - if ((bitsMirrorOnce & 8) != 0) wrapW = kTexWrapMirrorOnce; - - if ((bitsMirror & 2) != 0) wrapU = kTexWrapMirror; - if ((bitsMirror & 4) != 0) wrapV = kTexWrapMirror; - if ((bitsMirror & 8) != 0) wrapW = kTexWrapMirror; - - if (wrapU == wrapV && wrapU == wrapW) - bformata(str, "address::%s", wrapU); - else - bformata(str, "s_address::%s,t_address::%s,r_address::%s", wrapU, wrapV, wrapW); - - bformata(str, ");\n"); - - return true; -} + // See if it's a sampler that goes with the texture, or an "inline" sampler + // where sampler states are hardcoded in the shader directly. + // + // The logic for "inline" samplers below must match what is recognized + // by other shader platforms in Unity (ParseInlineSamplerName function + // in the shader compiler). + + std::string samplerName(name); std::transform(samplerName.begin(), samplerName.end(), samplerName.begin(), ::tolower); + + // filter modes + const bool hasPoint = (samplerName.find("point") != std::string::npos); + const bool hasTrilinear = (samplerName.find("trilinear") != std::string::npos); + const bool hasLinear = (samplerName.find("linear") != std::string::npos); + const bool hasAnyFilter = hasPoint || hasTrilinear || hasLinear; + + // wrap modes + const int bitsClamp = ParseInlineSamplerWrapMode(samplerName, "clamp"); + const int bitsRepeat = ParseInlineSamplerWrapMode(samplerName, "repeat"); + const int bitsMirror = ParseInlineSamplerWrapMode(samplerName, "mirror"); + const int bitsMirrorOnce = ParseInlineSamplerWrapMode(samplerName, "mirroronce"); + + const bool hasAnyWrap = bitsClamp != 0 || bitsRepeat != 0 || bitsMirror != 0 || bitsMirrorOnce != 0; + + // depth comparison + const bool hasCompare = (samplerName.find("compare") != std::string::npos); + + // name must contain a filter mode and a wrap mode at least + if (!hasAnyFilter || !hasAnyWrap) + { + return false; + } + + bstring str = GetEarlyMain(psContext); + bformata(str, "constexpr sampler %s(", name.c_str()); + + if (hasCompare) + bformata(str, "compare_func::greater_equal,"); + + if (hasTrilinear) + bformata(str, "filter::linear,mip_filter::linear,"); + else if (hasLinear) + bformata(str, "filter::linear,"); + else + bformata(str, "filter::nearest,"); + + const char* kTexWrapClamp = "clamp_to_edge"; + const char* kTexWrapRepeat = "repeat"; + const char* kTexWrapMirror = "mirrored_repeat"; + const char* kTexWrapMirrorOnce = "mirrored_repeat"; // currently Metal shading language does not have syntax for inline sampler state that would do "mirror clamp to edge" + const char* wrapU = kTexWrapRepeat; + const char* wrapV = kTexWrapRepeat; + const char* wrapW = kTexWrapRepeat; + + if (bitsClamp == 1) wrapU = wrapV = wrapW = kTexWrapClamp; + else if (bitsRepeat == 1) wrapU = wrapV = wrapW = kTexWrapRepeat; + else if (bitsMirrorOnce == 1) wrapU = wrapV = wrapW = kTexWrapMirrorOnce; + else if (bitsMirror == 1) wrapU = wrapV = wrapW = kTexWrapMirror; + + if ((bitsClamp & 2) != 0) wrapU = kTexWrapClamp; + if ((bitsClamp & 4) != 0) wrapV = kTexWrapClamp; + if ((bitsClamp & 8) != 0) wrapW = kTexWrapClamp; + + if ((bitsRepeat & 2) != 0) wrapU = kTexWrapRepeat; + if ((bitsRepeat & 4) != 0) wrapV = kTexWrapRepeat; + if ((bitsRepeat & 8) != 0) wrapW = kTexWrapRepeat; + + if ((bitsMirrorOnce & 2) != 0) wrapU = kTexWrapMirrorOnce; + if ((bitsMirrorOnce & 4) != 0) wrapV = kTexWrapMirrorOnce; + if ((bitsMirrorOnce & 8) != 0) wrapW = kTexWrapMirrorOnce; + + if ((bitsMirror & 2) != 0) wrapU = kTexWrapMirror; + if ((bitsMirror & 4) != 0) wrapV = kTexWrapMirror; + if ((bitsMirror & 8) != 0) wrapW = kTexWrapMirror; + + if (wrapU == wrapV && wrapU == wrapW) + bformata(str, "address::%s", wrapU); + else + bformata(str, "s_address::%s,t_address::%s,r_address::%s", wrapU, wrapV, wrapW); + bformata(str, ");\n"); + + return true; +} void ToMetal::TranslateDeclaration(const Declaration* psDecl) { - bstring glsl = *psContext->currentGLSLString; - Shader* psShader = psContext->psShader; - - switch (psDecl->eOpcode) - { - - case OPCODE_DCL_INPUT_SGV: - case OPCODE_DCL_INPUT_PS_SGV: - DeclareBuiltinInput(psDecl); - break; - case OPCODE_DCL_OUTPUT_SIV: - DeclareBuiltinOutput(psDecl); - break; - case OPCODE_DCL_INPUT: - case OPCODE_DCL_INPUT_PS_SIV: - case OPCODE_DCL_INPUT_SIV: - case OPCODE_DCL_INPUT_PS: - { - const Operand* psOperand = &psDecl->asOperands[0]; - - if((psOperand->eType == OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID)|| - (psOperand->eType == OPERAND_TYPE_INPUT_FORK_INSTANCE_ID)) - { - break; - } - - // No need to declare patch constants read again by the hull shader. - if ((psOperand->eType == OPERAND_TYPE_INPUT_PATCH_CONSTANT) && psContext->psShader->eShaderType == HULL_SHADER) - { - break; - } - // ...or control points - if ((psOperand->eType == OPERAND_TYPE_INPUT_CONTROL_POINT) && psContext->psShader->eShaderType == HULL_SHADER) - { - break; - } - - //Already declared as part of an array. - if(psShader->aIndexedInput[psOperand->GetRegisterSpace(psContext)][psDecl->asOperands[0].ui32RegisterNumber] == -1) - { - break; - } - - uint32_t ui32Reg = psDecl->asOperands[0].ui32RegisterNumber; - uint32_t ui32CompMask = psDecl->asOperands[0].ui32CompMask; - - std::string name = psContext->GetDeclaredInputName(psOperand, nullptr, 1, nullptr); - - // NB: unlike GL we keep arrays of 2-component vectors as is (without collapsing into float4) - // if(psShader->aIndexedInput[0][psDecl->asOperands[0].ui32RegisterNumber] == -1) - // break; - - // Already declared? - if ((ui32CompMask != 0) && ((ui32CompMask & ~psShader->acInputDeclared[0][ui32Reg]) == 0)) - { - ASSERT(0); // Catch this - break; - } - - if (psOperand->eType == OPERAND_TYPE_INPUT_COVERAGE_MASK) - { - std::ostringstream oss; - oss << "uint " << name << " [[ sample_mask ]]"; - m_StructDefinitions[""].m_Members.push_back(std::make_pair(name,oss.str())); - break; - } - - if (psOperand->eType == OPERAND_TYPE_INPUT_THREAD_ID) - { - std::ostringstream oss; - oss << "uint3 " << name << " [[ thread_position_in_grid ]]"; - m_StructDefinitions[""].m_Members.push_back(std::make_pair(name, oss.str())); - break; - } - - if (psOperand->eType == OPERAND_TYPE_INPUT_THREAD_GROUP_ID) - { - std::ostringstream oss; - oss << "uint3 " << name << " [[ threadgroup_position_in_grid ]]"; - m_StructDefinitions[""].m_Members.push_back(std::make_pair(name, oss.str())); - break; - } - - if (psOperand->eType == OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP) - { - std::ostringstream oss; - oss << "uint3 " << name << " [[ thread_position_in_threadgroup ]]"; - m_StructDefinitions[""].m_Members.push_back(std::make_pair(name, oss.str())); - break; - } - if (psOperand->eSpecialName == NAME_RENDER_TARGET_ARRAY_INDEX) - { - std::ostringstream oss; - oss << "uint " << name << " [[ render_target_array_index ]]"; - m_StructDefinitions[""].m_Members.push_back(std::make_pair(name, oss.str())); - break; - } - if (psOperand->eType == OPERAND_TYPE_INPUT_DOMAIN_POINT) - { - std::ostringstream oss; - std::string patchPositionType = psShader->sInfo.eTessDomain == TESSELLATOR_DOMAIN_QUAD ? "float2 " : "float3 "; - oss << patchPositionType << name << " [[ position_in_patch ]]"; - m_StructDefinitions[""].m_Members.push_back(std::make_pair(name, oss.str())); - break; - } - if (psOperand->eType == OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED) - { - std::ostringstream oss; - oss << "uint " << name << " [[ thread_index_in_threadgroup ]]"; - m_StructDefinitions[""].m_Members.push_back(std::make_pair(name, oss.str())); - break; - } - if (psOperand->eSpecialName == NAME_VIEWPORT_ARRAY_INDEX) - { - std::ostringstream oss; - oss << "uint " << name << " [[ viewport_array_index ]]"; - m_StructDefinitions[""].m_Members.push_back(std::make_pair(name, oss.str())); - break; - } - - if(psDecl->eOpcode == OPCODE_DCL_INPUT_PS_SIV && psOperand->eSpecialName == NAME_POSITION) - { - m_StructDefinitions[""].m_Members.push_back(std::make_pair("mtl_FragCoord", "float4 mtl_FragCoord [[ position ]]")); - break; - } - - if (psContext->psDependencies) - { - if (psShader->eShaderType == PIXEL_SHADER) - { - psContext->psDependencies->SetInterpolationMode(ui32Reg, psDecl->value.eInterpolation); - } - } - - int regSpace = psDecl->asOperands[0].GetRegisterSpace(psContext); - - const ShaderInfo::InOutSignature *psSig = NULL; - - // This falls within the specified index ranges. The default is 0 if no input range is specified - if (regSpace == 0) - psContext->psShader->sInfo.GetInputSignatureFromRegister(ui32Reg, ui32CompMask, &psSig); - else - psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(ui32Reg, ui32CompMask, &psSig); - - if (!psSig) - break; - - // fragment shader cannot reference builtins generated by vertex program (with obvious exception of position) - // TODO: some visible error? handle more builtins? - if (psContext->psShader->eShaderType == PIXEL_SHADER && !strncmp(psSig->semanticName.c_str(), "PSIZE", 5)) - break; - - int iNumComponents = psOperand->GetNumInputElements(psContext); - psShader->acInputDeclared[0][ui32Reg] = (char)psSig->ui32Mask; - - std::string typeName = BuildOperandTypeString(psOperand->eMinPrecision, psSig->eComponentType, iNumComponents); - - std::string semantic; - if (psContext->psShader->eShaderType == VERTEX_SHADER || psContext->psShader->eShaderType == HULL_SHADER || psContext->psShader->eShaderType == DOMAIN_SHADER) - { - std::ostringstream oss; - // VERTEX_SHADER hardcoded on purpose - uint32_t loc = psContext->psDependencies->GetVaryingLocation(name, VERTEX_SHADER, true); - oss << "attribute(" << loc << ")"; - semantic = oss.str(); - psContext->m_Reflection.OnInputBinding(name, loc); - } - else - { - std::ostringstream oss; - - // UNITY_FRAMEBUFFER_FETCH_AVAILABLE - // special case mapping for inout color, see HLSLSupport.cginc - if (psOperand->iPSInOut && name.size() == 10 && !strncmp(name.c_str(), "SV_Target", 9)) - { - // Metal allows color(X) declared in input/output structs - oss << "color(xlt_remap_i[" << psSig->ui32SemanticIndex << "])"; - m_NeedFBInputRemapDecl = true; - } - else - { - oss << "user(" << name << ")"; - } - semantic = oss.str(); - } - - std::string interpolation = ""; - if (psDecl->eOpcode == OPCODE_DCL_INPUT_PS) - { - interpolation = GetInterpolationString(psDecl->value.eInterpolation); - } - - std::string declString; - if ((OPERAND_INDEX_DIMENSION)psOperand->iIndexDims == INDEX_2D && psOperand->eType != OPERAND_TYPE_INPUT_CONTROL_POINT && psContext->psShader->eShaderType != HULL_SHADER) - { - std::ostringstream oss; - oss << typeName << " " << name << " [ " << psOperand->aui32ArraySizes[0] << " ] "; - - if (psContext->psShader->eShaderType != HULL_SHADER) - oss << " [[ " << semantic << " ]] " << interpolation; - declString = oss.str(); - } - else - { - std::ostringstream oss; - oss << typeName << " " << name; - if (psContext->psShader->eShaderType != HULL_SHADER) - oss << " [[ " << semantic << " ]] " << interpolation; - declString = oss.str(); - } - - if (psOperand->eType == OPERAND_TYPE_INPUT_PATCH_CONSTANT && psContext->psShader->eShaderType == DOMAIN_SHADER) - { - m_StructDefinitions["Mtl_PatchConstant"].m_Members.push_back(std::make_pair(name, declString)); - } - else if (psOperand->eType == OPERAND_TYPE_INPUT_CONTROL_POINT && psContext->psShader->eShaderType == DOMAIN_SHADER) - { - m_StructDefinitions["Mtl_ControlPoint"].m_Members.push_back(std::make_pair(name, declString)); - } - else if (psContext->psShader->eShaderType == HULL_SHADER) - { - m_StructDefinitions[GetInputStructName()].m_Members.push_back(std::make_pair(name, declString)); - } - else - { - m_StructDefinitions[GetInputStructName()].m_Members.push_back(std::make_pair(name, declString)); - } - - HandleInputRedirect(psDecl, BuildOperandTypeString(psOperand->eMinPrecision, INOUT_COMPONENT_FLOAT32, 4)); - break; - } - case OPCODE_DCL_TEMPS: - { - uint32_t i = 0; - const uint32_t ui32NumTemps = psDecl->value.ui32NumTemps; - glsl = psContext->psShader->asPhases[psContext->currentPhase].earlyMain; - for (i = 0; i < ui32NumTemps; i++) - { - if (psShader->psFloatTempSizes[i] != 0) - bformata(glsl, " %s " HLSLCC_TEMP_PREFIX "%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_FLOAT, psShader->psFloatTempSizes[i]), i); - if (psShader->psFloat16TempSizes[i] != 0) - bformata(glsl, " %s " HLSLCC_TEMP_PREFIX "16_%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_FLOAT16, psShader->psFloat16TempSizes[i]), i); - if (psShader->psFloat10TempSizes[i] != 0) - bformata(glsl, " %s " HLSLCC_TEMP_PREFIX "10_%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_FLOAT10, psShader->psFloat10TempSizes[i]), i); - if (psShader->psIntTempSizes[i] != 0) - bformata(glsl, " %s " HLSLCC_TEMP_PREFIX "i%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_INT, psShader->psIntTempSizes[i]), i); - if (psShader->psInt16TempSizes[i] != 0) - bformata(glsl, " %s " HLSLCC_TEMP_PREFIX "i16_%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_INT16, psShader->psInt16TempSizes[i]), i); - if (psShader->psInt12TempSizes[i] != 0) - bformata(glsl, " %s " HLSLCC_TEMP_PREFIX "i12_%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_INT12, psShader->psInt12TempSizes[i]), i); - if (psShader->psUIntTempSizes[i] != 0) - bformata(glsl, " %s " HLSLCC_TEMP_PREFIX "u%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_UINT, psShader->psUIntTempSizes[i]), i); - if (psShader->psUInt16TempSizes[i] != 0) - bformata(glsl, " %s " HLSLCC_TEMP_PREFIX "u16_%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_UINT16, psShader->psUInt16TempSizes[i]), i); - if (psShader->fp64 && (psShader->psDoubleTempSizes[i] != 0)) - bformata(glsl, " %s " HLSLCC_TEMP_PREFIX "d%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_DOUBLE, psShader->psDoubleTempSizes[i]), i); - if (psShader->psBoolTempSizes[i] != 0) - bformata(glsl, " %s " HLSLCC_TEMP_PREFIX "b%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_BOOL, psShader->psBoolTempSizes[i]), i); - } - break; - } - case OPCODE_SPECIAL_DCL_IMMCONST: - { - ASSERT(0 && "DX9 shaders no longer supported!"); - break; - } - case OPCODE_DCL_CONSTANT_BUFFER: - { - const ConstantBuffer* psCBuf = NULL; - psContext->psShader->sInfo.GetConstantBufferFromBindingPoint(RGROUP_CBUFFER, psDecl->asOperands[0].aui32ArraySizes[0], &psCBuf); - ASSERT(psCBuf != NULL); - - if (psCBuf->name.substr(0, 20) == "hlslcc_SubpassInput_" && psCBuf->name.length() >= 23 && !psCBuf->asVars.empty()) - { - // Special case for framebuffer fetch. - char ty = psCBuf->name[20]; - int idx = psCBuf->name[22] - '0'; - - const ShaderVar &sv = psCBuf->asVars[0]; - if (sv.name.substr(0, 15) == "hlslcc_fbinput_") - { - // Pick up the type and index - std::ostringstream oss; - m_NeedFBInputRemapDecl = true; - switch (ty) - { - case 'f': - case 'F': - oss << "float4 " << sv.name << " [[ color(xlt_remap_i["<< idx <<"]) ]]"; - m_StructDefinitions[""].m_Members.push_back(std::make_pair(sv.name, oss.str())); - break; - case 'h': - case 'H': - oss << "half4 " << sv.name << " [[ color(xlt_remap_i[" << idx << "]) ]]"; - m_StructDefinitions[""].m_Members.push_back(std::make_pair(sv.name, oss.str())); - break; - case 'i': - case 'I': - oss << "int4 " << sv.name << " [[ color(xlt_remap_i[" << idx << "]) ]]"; - m_StructDefinitions[""].m_Members.push_back(std::make_pair(sv.name, oss.str())); - break; - case 'u': - case 'U': - oss << "uint4 " << sv.name << " [[ color(xlt_remap_i[" << idx << "]) ]]"; - m_StructDefinitions[""].m_Members.push_back(std::make_pair(sv.name, oss.str())); - break; - default: - break; - } - } - // Break out so this doesn't get declared. - break; - } - - DeclareConstantBuffer(psCBuf, psDecl->asOperands[0].aui32ArraySizes[0]); - break; - } - case OPCODE_DCL_RESOURCE: - { - DeclareResource(psDecl); - break; - } - case OPCODE_DCL_OUTPUT: - { - DeclareOutput(psDecl); - break; - } - - case OPCODE_DCL_GLOBAL_FLAGS: - { - uint32_t ui32Flags = psDecl->value.ui32GlobalFlags; - - if (ui32Flags & GLOBAL_FLAG_FORCE_EARLY_DEPTH_STENCIL && psContext->psShader->eShaderType == PIXEL_SHADER) - { - psShader->sInfo.bEarlyFragmentTests = true; - } - if (!(ui32Flags & GLOBAL_FLAG_REFACTORING_ALLOWED)) - { - //TODO add precise - //HLSL precise - http://msdn.microsoft.com/en-us/library/windows/desktop/hh447204(v=vs.85).aspx - } - if (ui32Flags & GLOBAL_FLAG_ENABLE_DOUBLE_PRECISION_FLOAT_OPS) - { - // Not supported on Metal -// psShader->fp64 = 1; - } - break; - } - case OPCODE_DCL_THREAD_GROUP: - { - // Send this info to reflecion: Metal gives this at runtime as a param - psContext->m_Reflection.OnThreadGroupSize(psDecl->value.aui32WorkGroupSize[0], - psDecl->value.aui32WorkGroupSize[1], - psDecl->value.aui32WorkGroupSize[2]); - break; - } - case OPCODE_DCL_TESS_OUTPUT_PRIMITIVE: - { - if(psContext->psShader->eShaderType == HULL_SHADER) - { - psContext->psShader->sInfo.eTessOutPrim = psDecl->value.eTessOutPrim; - if (psContext->psShader->sInfo.eTessOutPrim == TESSELLATOR_OUTPUT_TRIANGLE_CW) - psContext->psShader->sInfo.eTessOutPrim = TESSELLATOR_OUTPUT_TRIANGLE_CCW; - else if (psContext->psShader->sInfo.eTessOutPrim == TESSELLATOR_OUTPUT_TRIANGLE_CCW) - psContext->psShader->sInfo.eTessOutPrim = TESSELLATOR_OUTPUT_TRIANGLE_CW; - } - break; - } - case OPCODE_DCL_TESS_DOMAIN: - { - psContext->psShader->sInfo.eTessDomain = psDecl->value.eTessDomain; - - if (psContext->psShader->sInfo.eTessDomain == TESSELLATOR_DOMAIN_ISOLINE) - psContext->m_Reflection.OnDiagnostics("Metal Tessellation: domain(\"isoline\") not supported.", 0, true); - break; - } - case OPCODE_DCL_TESS_PARTITIONING: - { - psContext->psShader->sInfo.eTessPartitioning = psDecl->value.eTessPartitioning; - break; - } - case OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY: - { - // Not supported - break; - } - case OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT: - { - // Not supported - break; - } - case OPCODE_DCL_GS_INPUT_PRIMITIVE: - { - // Not supported - break; - } - case OPCODE_DCL_INTERFACE: - { - // Are interfaces ever even used? - ASSERT(0); - break; - } - case OPCODE_DCL_FUNCTION_BODY: - { - ASSERT(0); - break; - } - case OPCODE_DCL_FUNCTION_TABLE: - { - ASSERT(0); - break; - } - case OPCODE_CUSTOMDATA: - { - // TODO: This is only ever accessed as a float currently. Do trickery if we ever see ints accessed from an array. - // Walk through all the chunks we've seen in this phase. - - bstring glsl = *psContext->currentGLSLString; - bformata(glsl, "constant float4 ImmCB_%d[%d] =\n{\n", psContext->currentPhase, psDecl->asImmediateConstBuffer.size()); - bool isFirst = true; - std::for_each(psDecl->asImmediateConstBuffer.begin(), psDecl->asImmediateConstBuffer.end(), [&](const ICBVec4 &data) - { - if (!isFirst) - { - bcatcstr(glsl, ",\n"); - } - isFirst = false; - - float val[4] = { - *(float*)&data.a, - *(float*)&data.b, - *(float*)&data.c, - *(float*)&data.d - }; - - bformata(glsl, "\tfloat4("); - for (uint32_t k = 0; k < 4; k++) - { - if (k != 0) - bcatcstr(glsl, ", "); - if (fpcheck(val[k])) - bformata(glsl, "as_type(0x%Xu)", *(uint32_t *)&val[k]); - else - HLSLcc::PrintFloat(glsl, val[k]); - } - bcatcstr(glsl, ")"); - }); - bcatcstr(glsl, "\n};\n"); - break; - } - case OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT: - case OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT: - break; // Nothing to do - - case OPCODE_DCL_INDEXABLE_TEMP: - { - const uint32_t ui32RegIndex = psDecl->sIdxTemp.ui32RegIndex; - const uint32_t ui32RegCount = psDecl->sIdxTemp.ui32RegCount; - const uint32_t ui32RegComponentSize = psDecl->sIdxTemp.ui32RegComponentSize; - bformata(psContext->psShader->asPhases[psContext->currentPhase].earlyMain, "float%d TempArray%d[%d];\n", ui32RegComponentSize, ui32RegIndex, ui32RegCount); - break; - } - case OPCODE_DCL_INDEX_RANGE: - { - switch (psDecl->asOperands[0].eType) - { - case OPERAND_TYPE_OUTPUT: - case OPERAND_TYPE_INPUT: - { - const ShaderInfo::InOutSignature* psSignature = NULL; - const char* type = "float"; - uint32_t startReg = 0; - uint32_t i; - bstring *oldString; - int regSpace = psDecl->asOperands[0].GetRegisterSpace(psContext); - int isInput = psDecl->asOperands[0].eType == OPERAND_TYPE_INPUT ? 1 : 0; - - if (regSpace == 0) - { - if (isInput) - psShader->sInfo.GetInputSignatureFromRegister( - psDecl->asOperands[0].ui32RegisterNumber, - psDecl->asOperands[0].ui32CompMask, - &psSignature); - else - psShader->sInfo.GetOutputSignatureFromRegister( - psDecl->asOperands[0].ui32RegisterNumber, - psDecl->asOperands[0].ui32CompMask, - psShader->ui32CurrentVertexOutputStream, - &psSignature); - } - else - psShader->sInfo.GetPatchConstantSignatureFromRegister(psDecl->asOperands[0].ui32RegisterNumber, psDecl->asOperands[0].ui32CompMask, &psSignature); - - ASSERT(psSignature != NULL); - - switch (psSignature->eComponentType) - { - case INOUT_COMPONENT_UINT32: - { - type = "uint"; - break; - } - case INOUT_COMPONENT_SINT32: - { - type = "int"; - break; - } - case INOUT_COMPONENT_FLOAT32: - { - break; - } - default: - ASSERT(0); - break; - } - - switch (psSignature->eMinPrec) // TODO What if the inputs in the indexed range are of different precisions? - { - default: - break; - case MIN_PRECISION_ANY_16: - ASSERT(0); // Wut? - break; - case MIN_PRECISION_FLOAT_16: - case MIN_PRECISION_FLOAT_2_8: - type = "half"; - break; - case MIN_PRECISION_SINT_16: - type = "short"; - break; - case MIN_PRECISION_UINT_16: - type = "ushort"; - break; - } - - startReg = psDecl->asOperands[0].ui32RegisterNumber; - oldString = psContext->currentGLSLString; - psContext->currentGLSLString = &psContext->psShader->asPhases[psContext->currentPhase].earlyMain; - psContext->AddIndentation(); - psContext->currentGLSLString = oldString; - bformata(psContext->psShader->asPhases[psContext->currentPhase].earlyMain, "%s4 phase%d_%sput%d_%d[%d];\n", type, psContext->currentPhase, isInput ? "In" : "Out", regSpace, startReg, psDecl->value.ui32IndexRange); - oldString = psContext->currentGLSLString; - glsl = isInput ? psContext->psShader->asPhases[psContext->currentPhase].earlyMain : psContext->psShader->asPhases[psContext->currentPhase].postShaderCode; - psContext->currentGLSLString = &glsl; - if (isInput == 0) - psContext->psShader->asPhases[psContext->currentPhase].hasPostShaderCode = 1; - for (i = 0; i < psDecl->value.ui32IndexRange; i++) - { - int dummy = 0; - std::string realName; - uint32_t destMask = psDecl->asOperands[0].ui32CompMask; - uint32_t rebase = 0; - const ShaderInfo::InOutSignature *psSig = NULL; - uint32_t regSpace = psDecl->asOperands[0].GetRegisterSpace(psContext); - - if (regSpace == 0) - if (isInput) - psContext->psShader->sInfo.GetInputSignatureFromRegister(startReg + i, destMask, &psSig); - else - psContext->psShader->sInfo.GetOutputSignatureFromRegister(startReg + i, destMask, 0, &psSig); - else - psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(startReg + i, destMask, &psSig); - - ASSERT(psSig != NULL); - - if ((psSig->ui32Mask & destMask) == 0) - continue; // Skip dummy writes (vec2 texcoords get filled to vec4 with zeroes etc) - - while ((psSig->ui32Mask & (1 << rebase)) == 0) - rebase++; - - ((Declaration *)psDecl)->asOperands[0].ui32RegisterNumber = startReg + i; - - if (isInput) - { - realName = psContext->GetDeclaredInputName(&psDecl->asOperands[0], &dummy, 1, NULL); - - psContext->AddIndentation(); - - bformata(glsl, "phase%d_Input%d_%d[%d]", psContext->currentPhase, regSpace, startReg, i); - - if (destMask != OPERAND_4_COMPONENT_MASK_ALL) - { - int k; - const char *swizzle = "xyzw"; - bcatcstr(glsl, "."); - for (k = 0; k < 4; k++) - { - if ((destMask & (1 << k)) && (psSig->ui32Mask & (1 << k))) - { - bformata(glsl, "%c", swizzle[k]); - } - } - } - - // for some reason input struct is missed here from GetDeclaredInputName result, so add it manually - bformata(glsl, " = input.%s", realName.c_str()); - if (destMask != OPERAND_4_COMPONENT_MASK_ALL && destMask != psSig->ui32Mask) - { - int k; - const char *swizzle = "xyzw"; - bcatcstr(glsl, "."); - for (k = 0; k < 4; k++) - { - if ((destMask & (1 << k)) && (psSig->ui32Mask & (1 << k))) - { - bformata(glsl, "%c", swizzle[k - rebase]); - } - } - } - } - else - { - realName = psContext->GetDeclaredOutputName(&psDecl->asOperands[0], &dummy, NULL, NULL, 0); - - psContext->AddIndentation(); - bcatcstr(glsl, realName.c_str()); - if (destMask != OPERAND_4_COMPONENT_MASK_ALL && destMask != psSig->ui32Mask) - { - int k; - const char *swizzle = "xyzw"; - bcatcstr(glsl, "."); - for (k = 0; k < 4; k++) - { - if ((destMask & (1 << k)) && (psSig->ui32Mask & (1 << k))) - { - bformata(glsl, "%c", swizzle[k - rebase]); - } - } - } - - bformata(glsl, " = phase%d_Output%d_%d[%d]", psContext->currentPhase, regSpace, startReg, i); - - if (destMask != OPERAND_4_COMPONENT_MASK_ALL) - { - int k; - const char *swizzle = "xyzw"; - bcatcstr(glsl, "."); - for (k = 0; k < 4; k++) - { - if ((destMask & (1 << k)) && (psSig->ui32Mask & (1 << k))) - { - bformata(glsl, "%c", swizzle[k]); - } - } - } - } - - bcatcstr(glsl, ";\n"); - } - - ((Declaration *)psDecl)->asOperands[0].ui32RegisterNumber = startReg; - psContext->currentGLSLString = oldString; - glsl = *psContext->currentGLSLString; - - for (i = 0; i < psDecl->value.ui32IndexRange; i++) - { - if (regSpace == 0) - { - if (isInput) - psShader->sInfo.GetInputSignatureFromRegister( - psDecl->asOperands[0].ui32RegisterNumber + i, - psDecl->asOperands[0].ui32CompMask, - &psSignature); - else - psShader->sInfo.GetOutputSignatureFromRegister( - psDecl->asOperands[0].ui32RegisterNumber + i, - psDecl->asOperands[0].ui32CompMask, - psShader->ui32CurrentVertexOutputStream, - &psSignature); - } - else - psShader->sInfo.GetPatchConstantSignatureFromRegister(psDecl->asOperands[0].ui32RegisterNumber + i, psDecl->asOperands[0].ui32CompMask, &psSignature); - - ASSERT(psSignature != NULL); - - ((ShaderInfo::InOutSignature *)psSignature)->isIndexed.insert(psContext->currentPhase); - ((ShaderInfo::InOutSignature *)psSignature)->indexStart[psContext->currentPhase] = startReg; - ((ShaderInfo::InOutSignature *)psSignature)->index[psContext->currentPhase] = i; - } - - - break; - } - default: - // TODO Input index ranges. - ASSERT(0); - } - break; - } - - case OPCODE_HS_DECLS: - { - // Not supported - break; - } - case OPCODE_DCL_INPUT_CONTROL_POINT_COUNT: - { - if(psContext->psShader->eShaderType == HULL_SHADER) - psShader->sInfo.ui32TessInputControlPointCount = psDecl->value.ui32MaxOutputVertexCount; - else if(psContext->psShader->eShaderType == DOMAIN_SHADER) - psShader->sInfo.ui32TessOutputControlPointCount = psDecl->value.ui32MaxOutputVertexCount; - break; - } - case OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT: - { - if(psContext->psShader->eShaderType == HULL_SHADER) - psShader->sInfo.ui32TessOutputControlPointCount = psDecl->value.ui32MaxOutputVertexCount; - break; - } - case OPCODE_HS_FORK_PHASE: - { - // Not supported - break; - } - case OPCODE_HS_JOIN_PHASE: - { - // Not supported - break; - } - case OPCODE_DCL_SAMPLER: - { - std::string name = TranslateOperand(&psDecl->asOperands[0], TO_FLAG_NAME_ONLY); - - if (!EmitInlineSampler(psContext, name)) - { - // for some reason we have some samplers start with "sampler" and some not - const bool startsWithSampler = name.find("sampler") == 0; - - std::ostringstream samplerOss; - samplerOss << (startsWithSampler ? "" : "sampler") << name; - std::string samplerName = samplerOss.str(); - - if (!psContext->psDependencies->IsMemberDeclared(samplerName)) - { - const uint32_t slot = m_SamplerSlots.GetBindingSlot(psDecl->asOperands[0].ui32RegisterNumber, BindingSlotAllocator::Texture); - std::ostringstream oss; - oss << "sampler " << samplerName << " [[ sampler (" << slot << ") ]]"; - - m_StructDefinitions[""].m_Members.push_back(std::make_pair(samplerName, oss.str())); - - SamplerDesc desc = { name, psDecl->asOperands[0].ui32RegisterNumber, slot }; - m_Samplers.push_back(desc); - } - } - - break; - } - case OPCODE_DCL_HS_MAX_TESSFACTOR: - { - if(psContext->psShader->eShaderType == HULL_SHADER && psContext->psDependencies) - psContext->psDependencies->fMaxTessFactor = psDecl->value.fMaxTessFactor; - break; - } - case OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED: - { - // A hack to support single component 32bit RWBuffers: Declare as raw buffer. - // TODO: Use textures for RWBuffers when the scripting API has actual format selection etc - // way to flag the created ComputeBuffer as typed. Even then might want to leave this - // hack path for 32bit (u)int typed buffers to continue support atomic ops on those formats. - if (psDecl->value.eResourceDimension == RESOURCE_DIMENSION_BUFFER) - { - DeclareBufferVariable(psDecl, true, true); - break; - } - std::string texName = ResourceName(RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber); - std::string samplerTypeName = TranslateResourceDeclaration(psContext, psDecl, texName, false, true); - if (!psContext->psDependencies->IsMemberDeclared(texName)) - { - uint32_t slot = m_TextureSlots.GetBindingSlot(psDecl->asOperands[0].ui32RegisterNumber, BindingSlotAllocator::UAV); - - std::ostringstream oss; - oss << samplerTypeName << " " << texName << " [[ texture(" << slot << ") ]] "; - - m_StructDefinitions[""].m_Members.push_back(std::make_pair(texName, oss.str())); - - HLSLCC_TEX_DIMENSION texDim = TD_INT; - switch (psDecl->value.eResourceDimension) - { - default: break; - case RESOURCE_DIMENSION_TEXTURE2D: - case RESOURCE_DIMENSION_TEXTURE2DMS: - texDim = TD_2D; - break; - case RESOURCE_DIMENSION_TEXTURE2DARRAY: - case RESOURCE_DIMENSION_TEXTURE2DMSARRAY: - texDim = TD_2DARRAY; - break; - case RESOURCE_DIMENSION_TEXTURE3D: - texDim = TD_3D; - break; - case RESOURCE_DIMENSION_TEXTURECUBE: - texDim = TD_CUBE; - break; - case RESOURCE_DIMENSION_TEXTURECUBEARRAY: - texDim = TD_CUBEARRAY; - break; - } - TextureSamplerDesc desc = {texName, (int)slot, -1, texDim, false, false, true}; - m_Textures.push_back(desc); - } - break; - } - - case OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED: - { - DeclareBufferVariable(psDecl, false, true); - break; - } - case OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW: - { - DeclareBufferVariable(psDecl, true, true); - break; - } - case OPCODE_DCL_RESOURCE_STRUCTURED: - { - DeclareBufferVariable(psDecl, false, false); - break; - } - case OPCODE_DCL_RESOURCE_RAW: - { - DeclareBufferVariable(psDecl, true, false); - break; - } - case OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_STRUCTURED: - { - ShaderVarType* psVarType = &psShader->sInfo.sGroupSharedVarType[psDecl->asOperands[0].ui32RegisterNumber]; - std::ostringstream oss; - oss << "uint value[" << psDecl->sTGSM.ui32Stride / 4 << "]"; - m_StructDefinitions[TranslateOperand(&psDecl->asOperands[0], TO_FLAG_NAME_ONLY) + "_Type"].m_Members.push_back(std::make_pair("value", oss.str())); - m_StructDefinitions[""].m_Dependencies.push_back(TranslateOperand(&psDecl->asOperands[0], TO_FLAG_NAME_ONLY) + "_Type"); - oss.str(""); - oss << "threadgroup " << TranslateOperand(&psDecl->asOperands[0], TO_FLAG_NAME_ONLY) - << "_Type " << TranslateOperand(&psDecl->asOperands[0], TO_FLAG_NAME_ONLY) - << "[" << psDecl->sTGSM.ui32Count << "]"; - - bformata(psContext->psShader->asPhases[psContext->currentPhase].earlyMain, "\t%s;\n", oss.str().c_str()); - - psVarType->name = "$Element"; - - psVarType->Columns = psDecl->sTGSM.ui32Stride / 4; - psVarType->Elements = psDecl->sTGSM.ui32Count; - break; - } - case OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_RAW: - { - ShaderVarType* psVarType = &psShader->sInfo.sGroupSharedVarType[psDecl->asOperands[0].ui32RegisterNumber]; - - std::ostringstream oss; - oss << "threadgroup uint " << TranslateOperand(&psDecl->asOperands[0], TO_FLAG_NAME_ONLY) - << "[" << (psDecl->sTGSM.ui32Count / psDecl->sTGSM.ui32Stride) << "]"; - - bformata(psContext->psShader->asPhases[psContext->currentPhase].earlyMain, "\t%s;\n", oss.str().c_str()); - - psVarType->name = "$Element"; - - psVarType->Columns = 1; - psVarType->Elements = psDecl->sTGSM.ui32Count / psDecl->sTGSM.ui32Stride; - break; - } - - case OPCODE_DCL_STREAM: - { - // Not supported on Metal - break; - } - case OPCODE_DCL_GS_INSTANCE_COUNT: - { - // Not supported on Metal - break; - } - - default: - ASSERT(0); - break; - } + bstring glsl = *psContext->currentGLSLString; + Shader* psShader = psContext->psShader; + + switch (psDecl->eOpcode) + { + case OPCODE_DCL_INPUT_SGV: + case OPCODE_DCL_INPUT_PS_SGV: + DeclareBuiltinInput(psDecl); + break; + case OPCODE_DCL_OUTPUT_SIV: + DeclareBuiltinOutput(psDecl); + break; + case OPCODE_DCL_INPUT: + case OPCODE_DCL_INPUT_PS_SIV: + case OPCODE_DCL_INPUT_SIV: + case OPCODE_DCL_INPUT_PS: + { + const Operand* psOperand = &psDecl->asOperands[0]; + + if ((psOperand->eType == OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID) || + (psOperand->eType == OPERAND_TYPE_INPUT_FORK_INSTANCE_ID)) + { + break; + } + + // No need to declare patch constants read again by the hull shader. + if ((psOperand->eType == OPERAND_TYPE_INPUT_PATCH_CONSTANT) && psContext->psShader->eShaderType == HULL_SHADER) + { + break; + } + // ...or control points + if ((psOperand->eType == OPERAND_TYPE_INPUT_CONTROL_POINT) && psContext->psShader->eShaderType == HULL_SHADER) + { + break; + } + + //Already declared as part of an array. + if (psShader->aIndexedInput[psOperand->GetRegisterSpace(psContext)][psDecl->asOperands[0].ui32RegisterNumber] == -1) + { + break; + } + + uint32_t ui32Reg = psDecl->asOperands[0].ui32RegisterNumber; + uint32_t ui32CompMask = psDecl->asOperands[0].ui32CompMask; + + std::string name = psContext->GetDeclaredInputName(psOperand, nullptr, 1, nullptr); + + // NB: unlike GL we keep arrays of 2-component vectors as is (without collapsing into float4) + // if(psShader->aIndexedInput[0][psDecl->asOperands[0].ui32RegisterNumber] == -1) + // break; + + // Already declared? + if ((ui32CompMask != 0) && ((ui32CompMask & ~psShader->acInputDeclared[0][ui32Reg]) == 0)) + { + ASSERT(0); // Catch this + break; + } + + if (psOperand->eType == OPERAND_TYPE_INPUT_COVERAGE_MASK) + { + std::ostringstream oss; + oss << "uint " << name << " [[ sample_mask ]]"; + m_StructDefinitions[""].m_Members.push_back(std::make_pair(name, oss.str())); + break; + } + + if (psOperand->eType == OPERAND_TYPE_INPUT_THREAD_ID) + { + std::ostringstream oss; + oss << "uint3 " << name << " [[ thread_position_in_grid ]]"; + m_StructDefinitions[""].m_Members.push_back(std::make_pair(name, oss.str())); + break; + } + + if (psOperand->eType == OPERAND_TYPE_INPUT_THREAD_GROUP_ID) + { + std::ostringstream oss; + oss << "uint3 " << name << " [[ threadgroup_position_in_grid ]]"; + m_StructDefinitions[""].m_Members.push_back(std::make_pair(name, oss.str())); + break; + } + + if (psOperand->eType == OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP) + { + std::ostringstream oss; + oss << "uint3 " << name << " [[ thread_position_in_threadgroup ]]"; + m_StructDefinitions[""].m_Members.push_back(std::make_pair(name, oss.str())); + break; + } + if (psOperand->eSpecialName == NAME_RENDER_TARGET_ARRAY_INDEX) + { + std::ostringstream oss; + oss << "uint " << name << " [[ render_target_array_index ]]"; + m_StructDefinitions[""].m_Members.push_back(std::make_pair(name, oss.str())); + break; + } + if (psOperand->eType == OPERAND_TYPE_INPUT_DOMAIN_POINT) + { + std::ostringstream oss; + std::string patchPositionType = psShader->sInfo.eTessDomain == TESSELLATOR_DOMAIN_QUAD ? "float2 " : "float3 "; + oss << patchPositionType << name << " [[ position_in_patch ]]"; + m_StructDefinitions[""].m_Members.push_back(std::make_pair(name, oss.str())); + break; + } + if (psOperand->eType == OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED) + { + std::ostringstream oss; + oss << "uint " << name << " [[ thread_index_in_threadgroup ]]"; + m_StructDefinitions[""].m_Members.push_back(std::make_pair(name, oss.str())); + break; + } + if (psOperand->eSpecialName == NAME_VIEWPORT_ARRAY_INDEX) + { + std::ostringstream oss; + oss << "uint " << name << " [[ viewport_array_index ]]"; + m_StructDefinitions[""].m_Members.push_back(std::make_pair(name, oss.str())); + break; + } + + if (psDecl->eOpcode == OPCODE_DCL_INPUT_PS_SIV && psOperand->eSpecialName == NAME_POSITION) + { + m_StructDefinitions[""].m_Members.push_back(std::make_pair("mtl_FragCoord", "float4 mtl_FragCoord [[ position ]]")); + bcatcstr(GetEarlyMain(psContext), "float4 hlslcc_FragCoord = float4(mtl_FragCoord.xyz, 1.0/mtl_FragCoord.w);\n"); + break; + } + + if (psContext->psDependencies) + { + if (psShader->eShaderType == PIXEL_SHADER) + { + psContext->psDependencies->SetInterpolationMode(ui32Reg, psDecl->value.eInterpolation); + } + } + + int regSpace = psDecl->asOperands[0].GetRegisterSpace(psContext); + + const ShaderInfo::InOutSignature *psSig = NULL; + + // This falls within the specified index ranges. The default is 0 if no input range is specified + if (regSpace == 0) + psContext->psShader->sInfo.GetInputSignatureFromRegister(ui32Reg, ui32CompMask, &psSig); + else + psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(ui32Reg, ui32CompMask, &psSig); + + if (!psSig) + break; + + // fragment shader cannot reference builtins generated by vertex program (with obvious exception of position) + // TODO: some visible error? handle more builtins? + if (psContext->psShader->eShaderType == PIXEL_SHADER && !strncmp(psSig->semanticName.c_str(), "PSIZE", 5)) + break; + + int iNumComponents = psOperand->GetNumInputElements(psContext); + psShader->acInputDeclared[0][ui32Reg] = (char)psSig->ui32Mask; + + std::string typeName = BuildOperandTypeString(psOperand->eMinPrecision, psSig->eComponentType, iNumComponents); + + std::string semantic; + if (psContext->psShader->eShaderType == VERTEX_SHADER || psContext->psShader->eShaderType == HULL_SHADER || psContext->psShader->eShaderType == DOMAIN_SHADER) + { + std::ostringstream oss; + // VERTEX_SHADER hardcoded on purpose + uint32_t loc = psContext->psDependencies->GetVaryingLocation(name, VERTEX_SHADER, true); + oss << "attribute(" << loc << ")"; + semantic = oss.str(); + psContext->m_Reflection.OnInputBinding(name, loc); + } + else + { + std::ostringstream oss; + + // UNITY_FRAMEBUFFER_FETCH_AVAILABLE + // special case mapping for inout color, see HLSLSupport.cginc + if (psOperand->iPSInOut && name.size() == 10 && !strncmp(name.c_str(), "SV_Target", 9)) + { + // Metal allows color(X) declared in input/output structs + oss << "color(xlt_remap_i[" << psSig->ui32SemanticIndex << "])"; + m_NeedFBInputRemapDecl = true; + } + else + { + oss << "user(" << name << ")"; + } + semantic = oss.str(); + } + + std::string interpolation = ""; + if (psDecl->eOpcode == OPCODE_DCL_INPUT_PS) + { + interpolation = GetInterpolationString(psDecl->value.eInterpolation); + } + + std::string declString; + if ((OPERAND_INDEX_DIMENSION)psOperand->iIndexDims == INDEX_2D && psOperand->eType != OPERAND_TYPE_INPUT_CONTROL_POINT && psContext->psShader->eShaderType != HULL_SHADER) + { + std::ostringstream oss; + oss << typeName << " " << name << " [ " << psOperand->aui32ArraySizes[0] << " ] "; + + if (psContext->psShader->eShaderType != HULL_SHADER) + oss << " [[ " << semantic << " ]] " << interpolation; + declString = oss.str(); + } + else + { + std::ostringstream oss; + oss << typeName << " " << name; + if (psContext->psShader->eShaderType != HULL_SHADER) + oss << " [[ " << semantic << " ]] " << interpolation; + declString = oss.str(); + } + + if (psOperand->eType == OPERAND_TYPE_INPUT_PATCH_CONSTANT && psContext->psShader->eShaderType == DOMAIN_SHADER) + { + m_StructDefinitions["Mtl_PatchConstant"].m_Members.push_back(std::make_pair(name, declString)); + } + else if (psOperand->eType == OPERAND_TYPE_INPUT_CONTROL_POINT && psContext->psShader->eShaderType == DOMAIN_SHADER) + { + m_StructDefinitions["Mtl_ControlPoint"].m_Members.push_back(std::make_pair(name, declString)); + } + else if (psContext->psShader->eShaderType == HULL_SHADER) + { + m_StructDefinitions[GetInputStructName()].m_Members.push_back(std::make_pair(name, declString)); + } + else + { + m_StructDefinitions[GetInputStructName()].m_Members.push_back(std::make_pair(name, declString)); + } + + HandleInputRedirect(psDecl, BuildOperandTypeString(psOperand->eMinPrecision, INOUT_COMPONENT_FLOAT32, 4)); + break; + } + case OPCODE_DCL_TEMPS: + { + uint32_t i = 0; + const uint32_t ui32NumTemps = psDecl->value.ui32NumTemps; + for (i = 0; i < ui32NumTemps; i++) + { + if (psShader->psFloatTempSizes[i] != 0) + bformata(GetEarlyMain(psContext), "%s " HLSLCC_TEMP_PREFIX "%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_FLOAT, psShader->psFloatTempSizes[i]), i); + if (psShader->psFloat16TempSizes[i] != 0) + bformata(GetEarlyMain(psContext), "%s " HLSLCC_TEMP_PREFIX "16_%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_FLOAT16, psShader->psFloat16TempSizes[i]), i); + if (psShader->psFloat10TempSizes[i] != 0) + bformata(GetEarlyMain(psContext), "%s " HLSLCC_TEMP_PREFIX "10_%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_FLOAT10, psShader->psFloat10TempSizes[i]), i); + if (psShader->psIntTempSizes[i] != 0) + bformata(GetEarlyMain(psContext), "%s " HLSLCC_TEMP_PREFIX "i%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_INT, psShader->psIntTempSizes[i]), i); + if (psShader->psInt16TempSizes[i] != 0) + bformata(GetEarlyMain(psContext), "%s " HLSLCC_TEMP_PREFIX "i16_%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_INT16, psShader->psInt16TempSizes[i]), i); + if (psShader->psInt12TempSizes[i] != 0) + bformata(GetEarlyMain(psContext), "%s " HLSLCC_TEMP_PREFIX "i12_%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_INT12, psShader->psInt12TempSizes[i]), i); + if (psShader->psUIntTempSizes[i] != 0) + bformata(GetEarlyMain(psContext), "%s " HLSLCC_TEMP_PREFIX "u%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_UINT, psShader->psUIntTempSizes[i]), i); + if (psShader->psUInt16TempSizes[i] != 0) + bformata(GetEarlyMain(psContext), "%s " HLSLCC_TEMP_PREFIX "u16_%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_UINT16, psShader->psUInt16TempSizes[i]), i); + if (psShader->fp64 && (psShader->psDoubleTempSizes[i] != 0)) + bformata(GetEarlyMain(psContext), "%s " HLSLCC_TEMP_PREFIX "d%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_DOUBLE, psShader->psDoubleTempSizes[i]), i); + if (psShader->psBoolTempSizes[i] != 0) + bformata(GetEarlyMain(psContext), "%s " HLSLCC_TEMP_PREFIX "b%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_BOOL, psShader->psBoolTempSizes[i]), i); + } + break; + } + case OPCODE_SPECIAL_DCL_IMMCONST: + { + ASSERT(0 && "DX9 shaders no longer supported!"); + break; + } + case OPCODE_DCL_CONSTANT_BUFFER: + { + const ConstantBuffer* psCBuf = NULL; + psContext->psShader->sInfo.GetConstantBufferFromBindingPoint(RGROUP_CBUFFER, psDecl->asOperands[0].aui32ArraySizes[0], &psCBuf); + ASSERT(psCBuf != NULL); + + if (psCBuf->name.substr(0, 20) == "hlslcc_SubpassInput_" && psCBuf->name.length() >= 23 && !psCBuf->asVars.empty()) + { + // Special case for framebuffer fetch. + char ty = psCBuf->name[20]; + int idx = psCBuf->name[22] - '0'; + + const ShaderVar &sv = psCBuf->asVars[0]; + if (sv.name.substr(0, 15) == "hlslcc_fbinput_") + { + // Pick up the type and index + std::ostringstream oss; + m_NeedFBInputRemapDecl = true; + switch (ty) + { + case 'f': + case 'F': + oss << "float4 " << sv.name << " [[ color(xlt_remap_i[" << idx << "]) ]]"; + m_StructDefinitions[""].m_Members.push_back(std::make_pair(sv.name, oss.str())); + break; + case 'h': + case 'H': + oss << "half4 " << sv.name << " [[ color(xlt_remap_i[" << idx << "]) ]]"; + m_StructDefinitions[""].m_Members.push_back(std::make_pair(sv.name, oss.str())); + break; + case 'i': + case 'I': + oss << "int4 " << sv.name << " [[ color(xlt_remap_i[" << idx << "]) ]]"; + m_StructDefinitions[""].m_Members.push_back(std::make_pair(sv.name, oss.str())); + break; + case 'u': + case 'U': + oss << "uint4 " << sv.name << " [[ color(xlt_remap_i[" << idx << "]) ]]"; + m_StructDefinitions[""].m_Members.push_back(std::make_pair(sv.name, oss.str())); + break; + default: + break; + } + } + // Break out so this doesn't get declared. + break; + } + + DeclareConstantBuffer(psCBuf, psDecl->asOperands[0].aui32ArraySizes[0]); + break; + } + case OPCODE_DCL_RESOURCE: + { + DeclareResource(psDecl); + break; + } + case OPCODE_DCL_OUTPUT: + { + DeclareOutput(psDecl); + break; + } + + case OPCODE_DCL_GLOBAL_FLAGS: + { + uint32_t ui32Flags = psDecl->value.ui32GlobalFlags; + + if (ui32Flags & GLOBAL_FLAG_FORCE_EARLY_DEPTH_STENCIL && psContext->psShader->eShaderType == PIXEL_SHADER) + { + psShader->sInfo.bEarlyFragmentTests = true; + } + if (!(ui32Flags & GLOBAL_FLAG_REFACTORING_ALLOWED)) + { + //TODO add precise + //HLSL precise - http://msdn.microsoft.com/en-us/library/windows/desktop/hh447204(v=vs.85).aspx + } + if (ui32Flags & GLOBAL_FLAG_ENABLE_DOUBLE_PRECISION_FLOAT_OPS) + { + // Not supported on Metal +// psShader->fp64 = 1; + } + break; + } + case OPCODE_DCL_THREAD_GROUP: + { + // Send this info to reflecion: Metal gives this at runtime as a param + psContext->m_Reflection.OnThreadGroupSize(psDecl->value.aui32WorkGroupSize[0], + psDecl->value.aui32WorkGroupSize[1], + psDecl->value.aui32WorkGroupSize[2]); + break; + } + case OPCODE_DCL_TESS_OUTPUT_PRIMITIVE: + { + if (psContext->psShader->eShaderType == HULL_SHADER) + { + psContext->psShader->sInfo.eTessOutPrim = psDecl->value.eTessOutPrim; + if (psContext->psShader->sInfo.eTessOutPrim == TESSELLATOR_OUTPUT_TRIANGLE_CW) + psContext->psShader->sInfo.eTessOutPrim = TESSELLATOR_OUTPUT_TRIANGLE_CCW; + else if (psContext->psShader->sInfo.eTessOutPrim == TESSELLATOR_OUTPUT_TRIANGLE_CCW) + psContext->psShader->sInfo.eTessOutPrim = TESSELLATOR_OUTPUT_TRIANGLE_CW; + } + break; + } + case OPCODE_DCL_TESS_DOMAIN: + { + psContext->psShader->sInfo.eTessDomain = psDecl->value.eTessDomain; + + if (psContext->psShader->sInfo.eTessDomain == TESSELLATOR_DOMAIN_ISOLINE) + psContext->m_Reflection.OnDiagnostics("Metal Tessellation: domain(\"isoline\") not supported.", 0, true); + break; + } + case OPCODE_DCL_TESS_PARTITIONING: + { + psContext->psShader->sInfo.eTessPartitioning = psDecl->value.eTessPartitioning; + break; + } + case OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY: + { + // Not supported + break; + } + case OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT: + { + // Not supported + break; + } + case OPCODE_DCL_GS_INPUT_PRIMITIVE: + { + // Not supported + break; + } + case OPCODE_DCL_INTERFACE: + { + // Are interfaces ever even used? + ASSERT(0); + break; + } + case OPCODE_DCL_FUNCTION_BODY: + { + ASSERT(0); + break; + } + case OPCODE_DCL_FUNCTION_TABLE: + { + ASSERT(0); + break; + } + case OPCODE_CUSTOMDATA: + { + // TODO: This is only ever accessed as a float currently. Do trickery if we ever see ints accessed from an array. + // Walk through all the chunks we've seen in this phase. + + bstring glsl = *psContext->currentGLSLString; + bformata(glsl, "constant float4 ImmCB_%d[%d] =\n{\n", psContext->currentPhase, psDecl->asImmediateConstBuffer.size()); + bool isFirst = true; + std::for_each(psDecl->asImmediateConstBuffer.begin(), psDecl->asImmediateConstBuffer.end(), [&](const ICBVec4 &data) + { + if (!isFirst) + { + bcatcstr(glsl, ",\n"); + } + isFirst = false; + + float val[4] = { + *(float*)&data.a, + *(float*)&data.b, + *(float*)&data.c, + *(float*)&data.d + }; + + bformata(glsl, "\tfloat4("); + for (uint32_t k = 0; k < 4; k++) + { + if (k != 0) + bcatcstr(glsl, ", "); + if (fpcheck(val[k])) + bformata(glsl, "as_type(0x%Xu)", *(uint32_t *)&val[k]); + else + HLSLcc::PrintFloat(glsl, val[k]); + } + bcatcstr(glsl, ")"); + }); + bcatcstr(glsl, "\n};\n"); + break; + } + case OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT: + case OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT: + break; // Nothing to do + + case OPCODE_DCL_INDEXABLE_TEMP: + { + const uint32_t ui32RegIndex = psDecl->sIdxTemp.ui32RegIndex; + const uint32_t ui32RegCount = psDecl->sIdxTemp.ui32RegCount; + const uint32_t ui32RegComponentSize = psDecl->sIdxTemp.ui32RegComponentSize; + bformata(GetEarlyMain(psContext), "float%d TempArray%d[%d];\n", ui32RegComponentSize, ui32RegIndex, ui32RegCount); + break; + } + case OPCODE_DCL_INDEX_RANGE: + { + switch (psDecl->asOperands[0].eType) + { + case OPERAND_TYPE_OUTPUT: + case OPERAND_TYPE_INPUT: + { + const ShaderInfo::InOutSignature* psSignature = NULL; + const char* type = "float"; + uint32_t startReg = 0; + uint32_t i; + bstring *oldString; + int regSpace = psDecl->asOperands[0].GetRegisterSpace(psContext); + int isInput = psDecl->asOperands[0].eType == OPERAND_TYPE_INPUT ? 1 : 0; + + if (regSpace == 0) + { + if (isInput) + psShader->sInfo.GetInputSignatureFromRegister( + psDecl->asOperands[0].ui32RegisterNumber, + psDecl->asOperands[0].ui32CompMask, + &psSignature); + else + psShader->sInfo.GetOutputSignatureFromRegister( + psDecl->asOperands[0].ui32RegisterNumber, + psDecl->asOperands[0].ui32CompMask, + psShader->ui32CurrentVertexOutputStream, + &psSignature); + } + else + psShader->sInfo.GetPatchConstantSignatureFromRegister(psDecl->asOperands[0].ui32RegisterNumber, psDecl->asOperands[0].ui32CompMask, &psSignature); + + ASSERT(psSignature != NULL); + + switch (psSignature->eComponentType) + { + case INOUT_COMPONENT_UINT32: + { + type = "uint"; + break; + } + case INOUT_COMPONENT_SINT32: + { + type = "int"; + break; + } + case INOUT_COMPONENT_FLOAT32: + { + break; + } + default: + ASSERT(0); + break; + } + + switch (psSignature->eMinPrec) // TODO What if the inputs in the indexed range are of different precisions? + { + default: + break; + case MIN_PRECISION_ANY_16: + ASSERT(0); // Wut? + break; + case MIN_PRECISION_FLOAT_16: + case MIN_PRECISION_FLOAT_2_8: + type = "half"; + break; + case MIN_PRECISION_SINT_16: + type = "short"; + break; + case MIN_PRECISION_UINT_16: + type = "ushort"; + break; + } + + startReg = psDecl->asOperands[0].ui32RegisterNumber; + oldString = psContext->currentGLSLString; + psContext->currentGLSLString = &psContext->psShader->asPhases[psContext->currentPhase].earlyMain; + psContext->AddIndentation(); + bformata(psContext->psShader->asPhases[psContext->currentPhase].earlyMain, "%s4 phase%d_%sput%d_%d[%d];\n", type, psContext->currentPhase, isInput ? "In" : "Out", regSpace, startReg, psDecl->value.ui32IndexRange); + glsl = isInput ? psContext->psShader->asPhases[psContext->currentPhase].earlyMain : psContext->psShader->asPhases[psContext->currentPhase].postShaderCode; + psContext->currentGLSLString = &glsl; + if (isInput == 0) + psContext->psShader->asPhases[psContext->currentPhase].hasPostShaderCode = 1; + for (i = 0; i < psDecl->value.ui32IndexRange; i++) + { + int dummy = 0; + std::string realName; + uint32_t destMask = psDecl->asOperands[0].ui32CompMask; + uint32_t rebase = 0; + const ShaderInfo::InOutSignature *psSig = NULL; + uint32_t regSpace = psDecl->asOperands[0].GetRegisterSpace(psContext); + + if (regSpace == 0) + if (isInput) + psContext->psShader->sInfo.GetInputSignatureFromRegister(startReg + i, destMask, &psSig); + else + psContext->psShader->sInfo.GetOutputSignatureFromRegister(startReg + i, destMask, 0, &psSig); + else + psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(startReg + i, destMask, &psSig); + + ASSERT(psSig != NULL); + + if ((psSig->ui32Mask & destMask) == 0) + continue; // Skip dummy writes (vec2 texcoords get filled to vec4 with zeroes etc) + + while ((psSig->ui32Mask & (1 << rebase)) == 0) + rebase++; + + ((Declaration *)psDecl)->asOperands[0].ui32RegisterNumber = startReg + i; + + if (isInput) + { + realName = psContext->GetDeclaredInputName(&psDecl->asOperands[0], &dummy, 1, NULL); + + psContext->AddIndentation(); + bformata(glsl, "phase%d_Input%d_%d[%d]", psContext->currentPhase, regSpace, startReg, i); + + if (destMask != OPERAND_4_COMPONENT_MASK_ALL) + { + int k; + const char *swizzle = "xyzw"; + bcatcstr(glsl, "."); + for (k = 0; k < 4; k++) + { + if ((destMask & (1 << k)) && (psSig->ui32Mask & (1 << k))) + { + bformata(glsl, "%c", swizzle[k]); + } + } + } + + // for some reason input struct is missed here from GetDeclaredInputName result, so add it manually + bformata(glsl, " = input.%s", realName.c_str()); + if (destMask != OPERAND_4_COMPONENT_MASK_ALL && destMask != psSig->ui32Mask) + { + int k; + const char *swizzle = "xyzw"; + bcatcstr(glsl, "."); + for (k = 0; k < 4; k++) + { + if ((destMask & (1 << k)) && (psSig->ui32Mask & (1 << k))) + { + bformata(glsl, "%c", swizzle[k - rebase]); + } + } + } + } + else + { + realName = psContext->GetDeclaredOutputName(&psDecl->asOperands[0], &dummy, NULL, NULL, 0); + + psContext->AddIndentation(); + bcatcstr(glsl, realName.c_str()); + if (destMask != OPERAND_4_COMPONENT_MASK_ALL && destMask != psSig->ui32Mask) + { + int k; + const char *swizzle = "xyzw"; + bcatcstr(glsl, "."); + for (k = 0; k < 4; k++) + { + if ((destMask & (1 << k)) && (psSig->ui32Mask & (1 << k))) + { + bformata(glsl, "%c", swizzle[k - rebase]); + } + } + } + + bformata(glsl, " = phase%d_Output%d_%d[%d]", psContext->currentPhase, regSpace, startReg, i); + + if (destMask != OPERAND_4_COMPONENT_MASK_ALL) + { + int k; + const char *swizzle = "xyzw"; + bcatcstr(glsl, "."); + for (k = 0; k < 4; k++) + { + if ((destMask & (1 << k)) && (psSig->ui32Mask & (1 << k))) + { + bformata(glsl, "%c", swizzle[k]); + } + } + } + } + + bcatcstr(glsl, ";\n"); + } + + ((Declaration *)psDecl)->asOperands[0].ui32RegisterNumber = startReg; + psContext->currentGLSLString = oldString; + glsl = *psContext->currentGLSLString; + + for (i = 0; i < psDecl->value.ui32IndexRange; i++) + { + if (regSpace == 0) + { + if (isInput) + psShader->sInfo.GetInputSignatureFromRegister( + psDecl->asOperands[0].ui32RegisterNumber + i, + psDecl->asOperands[0].ui32CompMask, + &psSignature); + else + psShader->sInfo.GetOutputSignatureFromRegister( + psDecl->asOperands[0].ui32RegisterNumber + i, + psDecl->asOperands[0].ui32CompMask, + psShader->ui32CurrentVertexOutputStream, + &psSignature); + } + else + psShader->sInfo.GetPatchConstantSignatureFromRegister(psDecl->asOperands[0].ui32RegisterNumber + i, psDecl->asOperands[0].ui32CompMask, &psSignature); + + ASSERT(psSignature != NULL); + + ((ShaderInfo::InOutSignature *)psSignature)->isIndexed.insert(psContext->currentPhase); + ((ShaderInfo::InOutSignature *)psSignature)->indexStart[psContext->currentPhase] = startReg; + ((ShaderInfo::InOutSignature *)psSignature)->index[psContext->currentPhase] = i; + } + + + break; + } + default: + // TODO Input index ranges. + ASSERT(0); + } + break; + } + + case OPCODE_HS_DECLS: + { + // Not supported + break; + } + case OPCODE_DCL_INPUT_CONTROL_POINT_COUNT: + { + if (psContext->psShader->eShaderType == HULL_SHADER) + psShader->sInfo.ui32TessInputControlPointCount = psDecl->value.ui32MaxOutputVertexCount; + else if (psContext->psShader->eShaderType == DOMAIN_SHADER) + psShader->sInfo.ui32TessOutputControlPointCount = psDecl->value.ui32MaxOutputVertexCount; + break; + } + case OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT: + { + if (psContext->psShader->eShaderType == HULL_SHADER) + psShader->sInfo.ui32TessOutputControlPointCount = psDecl->value.ui32MaxOutputVertexCount; + break; + } + case OPCODE_HS_FORK_PHASE: + { + // Not supported + break; + } + case OPCODE_HS_JOIN_PHASE: + { + // Not supported + break; + } + case OPCODE_DCL_SAMPLER: + { + std::string name = TranslateOperand(&psDecl->asOperands[0], TO_FLAG_NAME_ONLY); + + if (!EmitInlineSampler(psContext, name)) + { + // for some reason we have some samplers start with "sampler" and some not + const bool startsWithSampler = name.find("sampler") == 0; + + std::ostringstream samplerOss; + samplerOss << (startsWithSampler ? "" : "sampler") << name; + std::string samplerName = samplerOss.str(); + + if (!psContext->psDependencies->IsMemberDeclared(samplerName)) + { + const uint32_t slot = m_SamplerSlots.GetBindingSlot(psDecl->asOperands[0].ui32RegisterNumber, BindingSlotAllocator::Texture); + std::ostringstream oss; + oss << "sampler " << samplerName << " [[ sampler (" << slot << ") ]]"; + + m_StructDefinitions[""].m_Members.push_back(std::make_pair(samplerName, oss.str())); + + SamplerDesc desc = { name, psDecl->asOperands[0].ui32RegisterNumber, slot }; + m_Samplers.push_back(desc); + } + } + + break; + } + case OPCODE_DCL_HS_MAX_TESSFACTOR: + { + if (psContext->psShader->eShaderType == HULL_SHADER && psContext->psDependencies) + psContext->psDependencies->fMaxTessFactor = psDecl->value.fMaxTessFactor; + break; + } + case OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED: + { + // A hack to support single component 32bit RWBuffers: Declare as raw buffer. + // TODO: Use textures for RWBuffers when the scripting API has actual format selection etc + // way to flag the created ComputeBuffer as typed. Even then might want to leave this + // hack path for 32bit (u)int typed buffers to continue support atomic ops on those formats. + if (psDecl->value.eResourceDimension == RESOURCE_DIMENSION_BUFFER) + { + DeclareBufferVariable(psDecl, true, true); + break; + } + std::string texName = ResourceName(RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber); + std::string samplerTypeName = TranslateResourceDeclaration(psContext, psDecl, texName, false, true); + if (!psContext->psDependencies->IsMemberDeclared(texName)) + { + uint32_t slot = m_TextureSlots.GetBindingSlot(psDecl->asOperands[0].ui32RegisterNumber, BindingSlotAllocator::UAV); + + std::ostringstream oss; + oss << samplerTypeName << " " << texName << " [[ texture(" << slot << ") ]] "; + + m_StructDefinitions[""].m_Members.push_back(std::make_pair(texName, oss.str())); + + HLSLCC_TEX_DIMENSION texDim = TD_INT; + switch (psDecl->value.eResourceDimension) + { + default: break; + case RESOURCE_DIMENSION_TEXTURE2D: + case RESOURCE_DIMENSION_TEXTURE2DMS: + texDim = TD_2D; + break; + case RESOURCE_DIMENSION_TEXTURE2DARRAY: + case RESOURCE_DIMENSION_TEXTURE2DMSARRAY: + texDim = TD_2DARRAY; + break; + case RESOURCE_DIMENSION_TEXTURE3D: + texDim = TD_3D; + break; + case RESOURCE_DIMENSION_TEXTURECUBE: + texDim = TD_CUBE; + break; + case RESOURCE_DIMENSION_TEXTURECUBEARRAY: + texDim = TD_CUBEARRAY; + break; + } + TextureSamplerDesc desc = {texName, (int)slot, -1, texDim, false, false, true}; + m_Textures.push_back(desc); + } + break; + } + + case OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED: + { + DeclareBufferVariable(psDecl, false, true); + break; + } + case OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW: + { + DeclareBufferVariable(psDecl, true, true); + break; + } + case OPCODE_DCL_RESOURCE_STRUCTURED: + { + DeclareBufferVariable(psDecl, false, false); + break; + } + case OPCODE_DCL_RESOURCE_RAW: + { + DeclareBufferVariable(psDecl, true, false); + break; + } + case OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_STRUCTURED: + { + ShaderVarType* psVarType = &psShader->sInfo.sGroupSharedVarType[psDecl->asOperands[0].ui32RegisterNumber]; + std::ostringstream oss; + oss << "uint value[" << psDecl->sTGSM.ui32Stride / 4 << "]"; + m_StructDefinitions[TranslateOperand(&psDecl->asOperands[0], TO_FLAG_NAME_ONLY) + "_Type"].m_Members.push_back(std::make_pair("value", oss.str())); + m_StructDefinitions[""].m_Dependencies.push_back(TranslateOperand(&psDecl->asOperands[0], TO_FLAG_NAME_ONLY) + "_Type"); + oss.str(""); + oss << "threadgroup " << TranslateOperand(&psDecl->asOperands[0], TO_FLAG_NAME_ONLY) + << "_Type " << TranslateOperand(&psDecl->asOperands[0], TO_FLAG_NAME_ONLY) + << "[" << psDecl->sTGSM.ui32Count << "]"; + + bformata(GetEarlyMain(psContext), "%s;\n", oss.str().c_str()); + psVarType->name = "$Element"; + + psVarType->Columns = psDecl->sTGSM.ui32Stride / 4; + psVarType->Elements = psDecl->sTGSM.ui32Count; + break; + } + case OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_RAW: + { + ShaderVarType* psVarType = &psShader->sInfo.sGroupSharedVarType[psDecl->asOperands[0].ui32RegisterNumber]; + + std::ostringstream oss; + oss << "threadgroup uint " << TranslateOperand(&psDecl->asOperands[0], TO_FLAG_NAME_ONLY) + << "[" << (psDecl->sTGSM.ui32Count / psDecl->sTGSM.ui32Stride) << "]"; + + bformata(GetEarlyMain(psContext), "%s;\n", oss.str().c_str()); + psVarType->name = "$Element"; + + psVarType->Columns = 1; + psVarType->Elements = psDecl->sTGSM.ui32Count / psDecl->sTGSM.ui32Stride; + break; + } + + case OPCODE_DCL_STREAM: + { + // Not supported on Metal + break; + } + case OPCODE_DCL_GS_INSTANCE_COUNT: + { + // Not supported on Metal + break; + } + + default: + ASSERT(0); + break; + } } std::string ToMetal::ResourceName(ResourceGroup group, const uint32_t ui32RegisterNumber) { - const ResourceBinding* psBinding = 0; - std::ostringstream oss; - int found; - - found = psContext->psShader->sInfo.GetResourceFromBindingPoint(group, ui32RegisterNumber, &psBinding); - - if (found) - { - size_t i = 0; - std::string name = psBinding->name; - uint32_t ui32ArrayOffset = ui32RegisterNumber - psBinding->ui32BindPoint; - - while (i < name.length()) - { - //array syntax [X] becomes _0_ - //Otherwise declarations could end up as: - //uniform sampler2D SomeTextures[0]; - //uniform sampler2D SomeTextures[1]; - if (name[i] == '[' || name[i] == ']') - name[i] = '_'; - - ++i; - } - - if (ui32ArrayOffset) - { - oss << name << ui32ArrayOffset; - return oss.str(); - } - else - { - return name; - } - } - else - { - oss << "UnknownResource" << ui32RegisterNumber; - return oss.str(); - } + const ResourceBinding* psBinding = 0; + std::ostringstream oss; + int found; + + found = psContext->psShader->sInfo.GetResourceFromBindingPoint(group, ui32RegisterNumber, &psBinding); + + if (found) + { + size_t i = 0; + std::string name = psBinding->name; + uint32_t ui32ArrayOffset = ui32RegisterNumber - psBinding->ui32BindPoint; + + while (i < name.length()) + { + //array syntax [X] becomes _0_ + //Otherwise declarations could end up as: + //uniform sampler2D SomeTextures[0]; + //uniform sampler2D SomeTextures[1]; + if (name[i] == '[' || name[i] == ']') + name[i] = '_'; + + ++i; + } + + if (ui32ArrayOffset) + { + oss << name << ui32ArrayOffset; + return oss.str(); + } + else + { + return name; + } + } + else + { + oss << "UnknownResource" << ui32RegisterNumber; + return oss.str(); + } } void ToMetal::TranslateResourceTexture(const Declaration* psDecl, uint32_t samplerCanDoShadowCmp, HLSLCC_TEX_DIMENSION texDim) { - std::string texName = ResourceName(RGROUP_TEXTURE, psDecl->asOperands[0].ui32RegisterNumber); - const bool isDepthSampler = (samplerCanDoShadowCmp && psDecl->ui32IsShadowTex); - std::string samplerTypeName = TranslateResourceDeclaration(psContext, psDecl, texName, isDepthSampler, false); + std::string texName = ResourceName(RGROUP_TEXTURE, psDecl->asOperands[0].ui32RegisterNumber); + const bool isDepthSampler = (samplerCanDoShadowCmp && psDecl->ui32IsShadowTex); + std::string samplerTypeName = TranslateResourceDeclaration(psContext, psDecl, texName, isDepthSampler, false); bool isMS = false; - switch(psDecl->value.eResourceDimension) + switch (psDecl->value.eResourceDimension) { default: break; @@ -2193,284 +2172,279 @@ void ToMetal::TranslateResourceTexture(const Declaration* psDecl, uint32_t sampl break; } - if (!psContext->psDependencies->IsMemberDeclared(texName)) - { - uint32_t slot = m_TextureSlots.GetBindingSlot(psDecl->asOperands[0].ui32RegisterNumber, BindingSlotAllocator::Texture); + if (!psContext->psDependencies->IsMemberDeclared(texName)) + { + uint32_t slot = m_TextureSlots.GetBindingSlot(psDecl->asOperands[0].ui32RegisterNumber, BindingSlotAllocator::Texture); - std::ostringstream oss; - oss << samplerTypeName << " " << texName << " [[ texture(" << slot << ") ]] "; + std::ostringstream oss; + oss << samplerTypeName << " " << texName << " [[ texture(" << slot << ") ]] "; - m_StructDefinitions[""].m_Members.push_back(std::make_pair(texName, oss.str())); + m_StructDefinitions[""].m_Members.push_back(std::make_pair(texName, oss.str())); - TextureSamplerDesc desc = {texName, (int)slot, -1, texDim, isMS, isDepthSampler, false}; - m_Textures.push_back(desc); + TextureSamplerDesc desc = {texName, (int)slot, -1, texDim, isMS, isDepthSampler, false}; + m_Textures.push_back(desc); - if (isDepthSampler) - EnsureShadowSamplerDeclared(); - } + if (isDepthSampler) + EnsureShadowSamplerDeclared(); + } } void ToMetal::DeclareResource(const Declaration *psDecl) { - switch (psDecl->value.eResourceDimension) - { - case RESOURCE_DIMENSION_BUFFER: - { + switch (psDecl->value.eResourceDimension) + { + case RESOURCE_DIMENSION_BUFFER: + { // Fake single comp 32bit texel buffers by using raw buffer DeclareBufferVariable(psDecl, true, false); break; - // TODO: re-enable this code for buffer textures when sripting API has proper support for it + // TODO: re-enable this code for buffer textures when sripting API has proper support for it #if 0 - if (!psContext->psDependencies->IsMemberDeclared(texName)) - { - uint32_t slot = m_TextureSlots.GetBindingSlot(psDecl->asOperands[0].ui32RegisterNumber, BindingSlotAllocator::Texture); - std::string texName = TranslateOperand(&psDecl->asOperands[0], TO_FLAG_NAME_ONLY); - std::ostringstream oss; - oss << "device " << TranslateResourceDeclaration(psContext, psDecl, texName, false, false); - - oss << texName << " [[ texture(" << slot << ") ]]"; - - m_StructDefinitions[""].m_Members.push_back(std::make_pair(texName, oss.str())); - psContext->m_Reflection.OnTextureBinding(texName, slot, TD_2D, false); //TODO: correct HLSLCC_TEX_DIMENSION? - } - break; + if (!psContext->psDependencies->IsMemberDeclared(texName)) + { + uint32_t slot = m_TextureSlots.GetBindingSlot(psDecl->asOperands[0].ui32RegisterNumber, BindingSlotAllocator::Texture); + std::string texName = TranslateOperand(&psDecl->asOperands[0], TO_FLAG_NAME_ONLY); + std::ostringstream oss; + oss << "device " << TranslateResourceDeclaration(psContext, psDecl, texName, false, false); + + oss << texName << " [[ texture(" << slot << ") ]]"; + + m_StructDefinitions[""].m_Members.push_back(std::make_pair(texName, oss.str())); + psContext->m_Reflection.OnTextureBinding(texName, slot, TD_2D, false); //TODO: correct HLSLCC_TEX_DIMENSION? + } + break; #endif - } - default: - ASSERT(0); - break; - - case RESOURCE_DIMENSION_TEXTURE1D: - { - TranslateResourceTexture(psDecl, 1, TD_2D); //TODO: correct HLSLCC_TEX_DIMENSION? - break; - } - case RESOURCE_DIMENSION_TEXTURE2D: - { - TranslateResourceTexture(psDecl, 1, TD_2D); - break; - } - case RESOURCE_DIMENSION_TEXTURE2DMS: - { - TranslateResourceTexture(psDecl, 0, TD_2D); - break; - } - case RESOURCE_DIMENSION_TEXTURE3D: - { - TranslateResourceTexture(psDecl, 0, TD_3D); - break; - } - case RESOURCE_DIMENSION_TEXTURECUBE: - { - TranslateResourceTexture(psDecl, 1, TD_CUBE); - break; - } - case RESOURCE_DIMENSION_TEXTURE1DARRAY: - { - TranslateResourceTexture(psDecl, 1, TD_2DARRAY); //TODO: correct HLSLCC_TEX_DIMENSION? - break; - } - case RESOURCE_DIMENSION_TEXTURE2DARRAY: - { - TranslateResourceTexture(psDecl, 1, TD_2DARRAY); - break; - } - case RESOURCE_DIMENSION_TEXTURE2DMSARRAY: - { - TranslateResourceTexture(psDecl, 0, TD_2DARRAY); - break; - } - case RESOURCE_DIMENSION_TEXTURECUBEARRAY: - { - TranslateResourceTexture(psDecl, 1, TD_CUBEARRAY); - break; - } - } - psContext->psShader->aeResourceDims[psDecl->asOperands[0].ui32RegisterNumber] = psDecl->value.eResourceDimension; - + } + default: + ASSERT(0); + break; + case RESOURCE_DIMENSION_TEXTURE1D: + { + TranslateResourceTexture(psDecl, 1, TD_2D); //TODO: correct HLSLCC_TEX_DIMENSION? + break; + } + case RESOURCE_DIMENSION_TEXTURE2D: + { + TranslateResourceTexture(psDecl, 1, TD_2D); + break; + } + case RESOURCE_DIMENSION_TEXTURE2DMS: + { + TranslateResourceTexture(psDecl, 0, TD_2D); + break; + } + case RESOURCE_DIMENSION_TEXTURE3D: + { + TranslateResourceTexture(psDecl, 0, TD_3D); + break; + } + case RESOURCE_DIMENSION_TEXTURECUBE: + { + TranslateResourceTexture(psDecl, 1, TD_CUBE); + break; + } + case RESOURCE_DIMENSION_TEXTURE1DARRAY: + { + TranslateResourceTexture(psDecl, 1, TD_2DARRAY); //TODO: correct HLSLCC_TEX_DIMENSION? + break; + } + case RESOURCE_DIMENSION_TEXTURE2DARRAY: + { + TranslateResourceTexture(psDecl, 1, TD_2DARRAY); + break; + } + case RESOURCE_DIMENSION_TEXTURE2DMSARRAY: + { + TranslateResourceTexture(psDecl, 0, TD_2DARRAY); + break; + } + case RESOURCE_DIMENSION_TEXTURECUBEARRAY: + { + TranslateResourceTexture(psDecl, 1, TD_CUBEARRAY); + break; + } + } + psContext->psShader->aeResourceDims[psDecl->asOperands[0].ui32RegisterNumber] = psDecl->value.eResourceDimension; } void ToMetal::DeclareOutput(const Declaration *psDecl) { - Shader* psShader = psContext->psShader; - - if (!psContext->OutputNeedsDeclaring(&psDecl->asOperands[0], 1)) - return; - - const Operand* psOperand = &psDecl->asOperands[0]; - int iNumComponents; - int regSpace = psDecl->asOperands[0].GetRegisterSpace(psContext); - uint32_t ui32Reg = psDecl->asOperands[0].ui32RegisterNumber; - - const ShaderInfo::InOutSignature* psSignature = NULL; - SHADER_VARIABLE_TYPE cType = SVT_VOID; - - if (psOperand->eType == OPERAND_TYPE_OUTPUT_DEPTH || - psOperand->eType == OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL || - psOperand->eType == OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL) - { - iNumComponents = 1; - cType = SVT_FLOAT; - } - else - { - if (regSpace == 0) - psShader->sInfo.GetOutputSignatureFromRegister( - ui32Reg, - psDecl->asOperands[0].ui32CompMask, - psShader->ui32CurrentVertexOutputStream, - &psSignature); - else - psShader->sInfo.GetPatchConstantSignatureFromRegister(ui32Reg, psDecl->asOperands[0].ui32CompMask, &psSignature); - - iNumComponents = HLSLcc::GetNumberBitsSet(psSignature->ui32Mask); - - switch (psSignature->eComponentType) - { - case INOUT_COMPONENT_UINT32: - { - cType = SVT_UINT; - break; - } - case INOUT_COMPONENT_SINT32: - { - cType = SVT_INT; - break; - } - case INOUT_COMPONENT_FLOAT32: - { - cType = SVT_FLOAT; - break; - } - default: - ASSERT(0); - break; - } - // Don't set this for oDepth (or variants), because depth output register is in separate space from other outputs (regno 0, but others may overlap with that) - if (iNumComponents == 1) - psContext->psShader->abScalarOutput[regSpace][ui32Reg] |= (int)psDecl->asOperands[0].ui32CompMask; - - switch (psOperand->eMinPrecision) - { - case OPERAND_MIN_PRECISION_DEFAULT: - break; - case OPERAND_MIN_PRECISION_FLOAT_16: - cType = SVT_FLOAT16; - break; - case OPERAND_MIN_PRECISION_FLOAT_2_8: - cType = SVT_FLOAT10; - break; - case OPERAND_MIN_PRECISION_SINT_16: - cType = SVT_INT16; - break; - case OPERAND_MIN_PRECISION_UINT_16: - cType = SVT_UINT16; - break; - } - } - - std::string type = HLSLcc::GetConstructorForTypeMetal(cType, iNumComponents); - std::string name = psContext->GetDeclaredOutputName(&psDecl->asOperands[0], nullptr, nullptr, nullptr, 1); - - switch (psShader->eShaderType) - { - case PIXEL_SHADER: - { - switch (psDecl->asOperands[0].eType) - { - case OPERAND_TYPE_OUTPUT_COVERAGE_MASK: - { - std::ostringstream oss; - oss << type << " " << name << " [[ sample_mask ]]"; - m_StructDefinitions[GetOutputStructName()].m_Members.push_back(std::make_pair(name, oss.str())); - break; - } - case OPERAND_TYPE_OUTPUT_DEPTH: - { - std::ostringstream oss; - oss << type << " " << name << " [[ depth(any) ]]"; - m_StructDefinitions[GetOutputStructName()].m_Members.push_back(std::make_pair(name, oss.str())); - break; - } - case OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL: - { - std::ostringstream oss; - oss << type << " " << name << " [[ depth(greater) ]]"; - m_StructDefinitions[GetOutputStructName()].m_Members.push_back(std::make_pair(name, oss.str())); - break; - } - case OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL: - { - std::ostringstream oss; - oss << type << " " << name << " [[ depth(less) ]]"; - m_StructDefinitions[GetOutputStructName()].m_Members.push_back(std::make_pair(name, oss.str())); - break; - } - default: - { - std::ostringstream oss; - oss << type << " " << name << " [[ color(xlt_remap_o[" << psSignature->ui32SemanticIndex << "]) ]]"; - m_NeedFBOutputRemapDecl = true; - m_StructDefinitions[GetOutputStructName()].m_Members.push_back(std::make_pair(name, oss.str())); - } - } - break; - } - case VERTEX_SHADER: - case DOMAIN_SHADER: - case HULL_SHADER: - { - std::string out = GetOutputStructName(); - bool isTessKernel = (psContext->flags & HLSLCC_FLAG_METAL_TESSELLATION) != 0 && (psContext->psShader->eShaderType == HULL_SHADER || psContext->psShader->eShaderType == VERTEX_SHADER); - - std::ostringstream oss; - oss << type << " " << name; - if (!isTessKernel && (psSignature->eSystemValueType == NAME_POSITION || psSignature->semanticName == "POS") && psOperand->ui32RegisterNumber == 0) - oss << " [[ position ]]"; - else if (!isTessKernel && psSignature->eSystemValueType == NAME_UNDEFINED && psSignature->semanticName == "PSIZE" && psSignature->ui32SemanticIndex == 0 ) - oss << " [[ point_size ]]"; - else - oss << " [[ user(" << name << ") ]]"; - m_StructDefinitions[out].m_Members.push_back(std::make_pair(name, oss.str())); - - // For preserving data layout, declare output struct as domain shader input, too - if (psContext->psShader->eShaderType == HULL_SHADER) - { - out += "In"; - - std::ostringstream oss; - oss << type << " " << name; - - // VERTEX_SHADER hardcoded on purpose - uint32_t loc = psContext->psDependencies->GetVaryingLocation(name, VERTEX_SHADER, true); - oss << " [[ " << "attribute(" << loc << ")" << " ]] "; - - psContext->m_Reflection.OnInputBinding(name, loc); - m_StructDefinitions[out].m_Members.push_back(std::make_pair(name, oss.str())); - } - break; - } - case GEOMETRY_SHADER: - default: - ASSERT(0); - break; - - } - HandleOutputRedirect(psDecl, HLSLcc::GetConstructorForTypeMetal(cType, 4)); - - + Shader* psShader = psContext->psShader; + + if (!psContext->OutputNeedsDeclaring(&psDecl->asOperands[0], 1)) + return; + + const Operand* psOperand = &psDecl->asOperands[0]; + int iNumComponents; + int regSpace = psDecl->asOperands[0].GetRegisterSpace(psContext); + uint32_t ui32Reg = psDecl->asOperands[0].ui32RegisterNumber; + + const ShaderInfo::InOutSignature* psSignature = NULL; + SHADER_VARIABLE_TYPE cType = SVT_VOID; + + if (psOperand->eType == OPERAND_TYPE_OUTPUT_DEPTH || + psOperand->eType == OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL || + psOperand->eType == OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL) + { + iNumComponents = 1; + cType = SVT_FLOAT; + } + else + { + if (regSpace == 0) + psShader->sInfo.GetOutputSignatureFromRegister( + ui32Reg, + psDecl->asOperands[0].ui32CompMask, + psShader->ui32CurrentVertexOutputStream, + &psSignature); + else + psShader->sInfo.GetPatchConstantSignatureFromRegister(ui32Reg, psDecl->asOperands[0].ui32CompMask, &psSignature); + + iNumComponents = HLSLcc::GetNumberBitsSet(psSignature->ui32Mask); + + switch (psSignature->eComponentType) + { + case INOUT_COMPONENT_UINT32: + { + cType = SVT_UINT; + break; + } + case INOUT_COMPONENT_SINT32: + { + cType = SVT_INT; + break; + } + case INOUT_COMPONENT_FLOAT32: + { + cType = SVT_FLOAT; + break; + } + default: + ASSERT(0); + break; + } + // Don't set this for oDepth (or variants), because depth output register is in separate space from other outputs (regno 0, but others may overlap with that) + if (iNumComponents == 1) + psContext->psShader->abScalarOutput[regSpace][ui32Reg] |= (int)psDecl->asOperands[0].ui32CompMask; + + switch (psOperand->eMinPrecision) + { + case OPERAND_MIN_PRECISION_DEFAULT: + break; + case OPERAND_MIN_PRECISION_FLOAT_16: + cType = SVT_FLOAT16; + break; + case OPERAND_MIN_PRECISION_FLOAT_2_8: + cType = SVT_FLOAT10; + break; + case OPERAND_MIN_PRECISION_SINT_16: + cType = SVT_INT16; + break; + case OPERAND_MIN_PRECISION_UINT_16: + cType = SVT_UINT16; + break; + } + } + + std::string type = HLSLcc::GetConstructorForTypeMetal(cType, iNumComponents); + std::string name = psContext->GetDeclaredOutputName(&psDecl->asOperands[0], nullptr, nullptr, nullptr, 1); + + switch (psShader->eShaderType) + { + case PIXEL_SHADER: + { + switch (psDecl->asOperands[0].eType) + { + case OPERAND_TYPE_OUTPUT_COVERAGE_MASK: + { + std::ostringstream oss; + oss << type << " " << name << " [[ sample_mask ]]"; + m_StructDefinitions[GetOutputStructName()].m_Members.push_back(std::make_pair(name, oss.str())); + break; + } + case OPERAND_TYPE_OUTPUT_DEPTH: + { + std::ostringstream oss; + oss << type << " " << name << " [[ depth(any) ]]"; + m_StructDefinitions[GetOutputStructName()].m_Members.push_back(std::make_pair(name, oss.str())); + break; + } + case OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL: + { + std::ostringstream oss; + oss << type << " " << name << " [[ depth(greater) ]]"; + m_StructDefinitions[GetOutputStructName()].m_Members.push_back(std::make_pair(name, oss.str())); + break; + } + case OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL: + { + std::ostringstream oss; + oss << type << " " << name << " [[ depth(less) ]]"; + m_StructDefinitions[GetOutputStructName()].m_Members.push_back(std::make_pair(name, oss.str())); + break; + } + default: + { + std::ostringstream oss; + oss << type << " " << name << " [[ color(xlt_remap_o[" << psSignature->ui32SemanticIndex << "]) ]]"; + m_NeedFBOutputRemapDecl = true; + m_StructDefinitions[GetOutputStructName()].m_Members.push_back(std::make_pair(name, oss.str())); + } + } + break; + } + case VERTEX_SHADER: + case DOMAIN_SHADER: + case HULL_SHADER: + { + std::string out = GetOutputStructName(); + bool isTessKernel = (psContext->flags & HLSLCC_FLAG_METAL_TESSELLATION) != 0 && (psContext->psShader->eShaderType == HULL_SHADER || psContext->psShader->eShaderType == VERTEX_SHADER); + + std::ostringstream oss; + oss << type << " " << name; + if (!isTessKernel && (psSignature->eSystemValueType == NAME_POSITION || psSignature->semanticName == "POS") && psOperand->ui32RegisterNumber == 0) + oss << " [[ position ]]"; + else if (!isTessKernel && psSignature->eSystemValueType == NAME_UNDEFINED && psSignature->semanticName == "PSIZE" && psSignature->ui32SemanticIndex == 0) + oss << " [[ point_size ]]"; + else + oss << " [[ user(" << name << ") ]]"; + m_StructDefinitions[out].m_Members.push_back(std::make_pair(name, oss.str())); + + // For preserving data layout, declare output struct as domain shader input, too + if (psContext->psShader->eShaderType == HULL_SHADER) + { + out += "In"; + + std::ostringstream oss; + oss << type << " " << name; + + // VERTEX_SHADER hardcoded on purpose + uint32_t loc = psContext->psDependencies->GetVaryingLocation(name, VERTEX_SHADER, true); + oss << " [[ " << "attribute(" << loc << ")" << " ]] "; + + psContext->m_Reflection.OnInputBinding(name, loc); + m_StructDefinitions[out].m_Members.push_back(std::make_pair(name, oss.str())); + } + break; + } + case GEOMETRY_SHADER: + default: + ASSERT(0); + break; + } + HandleOutputRedirect(psDecl, HLSLcc::GetConstructorForTypeMetal(cType, 4)); } void ToMetal::EnsureShadowSamplerDeclared() { - if (m_ShadowSamplerDeclared) - return; - - if((psContext->flags & HLSLCC_FLAG_METAL_SHADOW_SAMPLER_LINEAR) != 0 || (psContext->psShader->eShaderType == COMPUTE_SHADER)) - m_ExtraGlobalDefinitions += "constexpr sampler _mtl_xl_shadow_sampler(address::clamp_to_edge, filter::linear, compare_func::greater_equal);\n"; - else - m_ExtraGlobalDefinitions += "constexpr sampler _mtl_xl_shadow_sampler(address::clamp_to_edge, filter::nearest, compare_func::greater_equal);\n"; - m_ShadowSamplerDeclared = true; + if (m_ShadowSamplerDeclared) + return; + + if ((psContext->flags & HLSLCC_FLAG_METAL_SHADOW_SAMPLER_LINEAR) != 0 || (psContext->psShader->eShaderType == COMPUTE_SHADER)) + m_ExtraGlobalDefinitions += "constexpr sampler _mtl_xl_shadow_sampler(address::clamp_to_edge, filter::linear, compare_func::greater_equal);\n"; + else + m_ExtraGlobalDefinitions += "constexpr sampler _mtl_xl_shadow_sampler(address::clamp_to_edge, filter::nearest, compare_func::greater_equal);\n"; + m_ShadowSamplerDeclared = true; } diff --git a/src/toMetalInstruction.cpp b/src/toMetalInstruction.cpp index d5a60b5..f00fe7b 100644 --- a/src/toMetalInstruction.cpp +++ b/src/toMetalInstruction.cpp @@ -14,10 +14,10 @@ using namespace HLSLcc; -bstring operator << (bstring a, const std::string &b) +bstring operator<<(bstring a, const std::string &b) { - bcatcstr(a, b.c_str()); - return a; + bcatcstr(a, b.c_str()); + return a; } // This function prints out the destination name, possible destination writemask, assignment operator @@ -26,3906 +26,3909 @@ bstring operator << (bstring a, const std::string &b) // and pSrcCount will be filled with the number of components expected // ui32CompMask can be used to only write to 1 or more components (used by MOVC) void ToMetal::AddOpAssignToDestWithMask(const Operand* psDest, - SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, const char *szAssignmentOp, int *pNeedsParenthesis, uint32_t ui32CompMask) + SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, const char *szAssignmentOp, int *pNeedsParenthesis, uint32_t ui32CompMask) { - uint32_t ui32DestElementCount = psDest->GetNumSwizzleElements(ui32CompMask); - bstring glsl = *psContext->currentGLSLString; - SHADER_VARIABLE_TYPE eDestDataType = psDest->GetDataType(psContext); - ASSERT(pNeedsParenthesis != NULL); - - *pNeedsParenthesis = 0; - - glsl << TranslateOperand(psDest, TO_FLAG_DESTINATION, ui32CompMask); - - // Simple path: types match. - if (eDestDataType == eSrcType) - { - // Cover cases where the HLSL language expects the rest of the components to be default-filled - // eg. MOV r0, c0.x => Temp[0] = vec4(c0.x); - if (ui32DestElementCount > ui32SrcElementCount) - { - bformata(glsl, " %s %s(", szAssignmentOp, GetConstructorForTypeMetal(eDestDataType, ui32DestElementCount)); - *pNeedsParenthesis = 1; - } - else - bformata(glsl, " %s ", szAssignmentOp); - return; - } - // Up/downscaling with cast. The monster of condition there checks if the underlying datatypes are the same, just with prec differences - if (((eDestDataType == SVT_FLOAT || eDestDataType == SVT_FLOAT16 || eDestDataType == SVT_FLOAT10) && (eSrcType == SVT_FLOAT || eSrcType == SVT_FLOAT16 || eSrcType == SVT_FLOAT10)) - || ((eDestDataType == SVT_INT || eDestDataType == SVT_INT16 || eDestDataType == SVT_INT12) && (eSrcType == SVT_INT || eSrcType == SVT_INT16 || eSrcType == SVT_INT12)) - || ((eDestDataType == SVT_UINT || eDestDataType == SVT_UINT16) && (eSrcType == SVT_UINT || eSrcType == SVT_UINT16))) - { - bformata(glsl, " %s %s(", szAssignmentOp, GetConstructorForTypeMetal(eDestDataType, ui32DestElementCount)); - *pNeedsParenthesis = 1; - return; - } - - switch (eDestDataType) - { - case SVT_INT: - case SVT_INT12: - case SVT_INT16: - // Bitcasts from lower precisions are ambiguous - ASSERT(eSrcType != SVT_FLOAT10 && eSrcType != SVT_FLOAT16); - if (eSrcType == SVT_FLOAT) - { - if(ui32DestElementCount > 1) - bformata(glsl, " %s as_type(", szAssignmentOp, ui32DestElementCount); - else - bformata(glsl, " %s as_type(", szAssignmentOp); - - // Cover cases where the HLSL language expects the rest of the components to be default-filled - if (ui32DestElementCount > ui32SrcElementCount) - { - bformata(glsl, "%s(", GetConstructorForTypeMetal(eSrcType, ui32DestElementCount)); - (*pNeedsParenthesis)++; - } - } - else - bformata(glsl, " %s %s(", szAssignmentOp, GetConstructorForTypeMetal(eDestDataType, ui32DestElementCount)); - - (*pNeedsParenthesis)++; - break; - case SVT_UINT: - case SVT_UINT16: - ASSERT(eSrcType != SVT_FLOAT10 && eSrcType != SVT_FLOAT16); - if (eSrcType == SVT_FLOAT) - { - if (ui32DestElementCount > 1) - bformata(glsl, " %s as_type(", szAssignmentOp, ui32DestElementCount); - else - bformata(glsl, " %s as_type(", szAssignmentOp); - // Cover cases where the HLSL language expects the rest of the components to be default-filled - if (ui32DestElementCount > ui32SrcElementCount) - { - bformata(glsl, "%s(", GetConstructorForTypeMetal(eSrcType, ui32DestElementCount)); - (*pNeedsParenthesis)++; - } - } - else - bformata(glsl, " %s %s(", szAssignmentOp, GetConstructorForTypeMetal(eDestDataType, ui32DestElementCount)); - - (*pNeedsParenthesis)++; - break; - - case SVT_FLOAT: - case SVT_FLOAT10: - case SVT_FLOAT16: - ASSERT(eSrcType != SVT_INT12 || (eSrcType != SVT_INT16 && eSrcType != SVT_UINT16)); - if (psContext->psShader->ui32MajorVersion > 3) - { - if (ui32DestElementCount > 1) - bformata(glsl, " %s as_type(", szAssignmentOp, ui32DestElementCount); - else - bformata(glsl, " %s as_type(", szAssignmentOp); - // Cover cases where the HLSL language expects the rest of the components to be default-filled - if (ui32DestElementCount > ui32SrcElementCount) - { - bformata(glsl, "%s(", GetConstructorForTypeMetal(eSrcType, ui32DestElementCount)); - (*pNeedsParenthesis)++; - } - } - else - bformata(glsl, " %s %s(", szAssignmentOp, GetConstructorForTypeMetal(eDestDataType, ui32DestElementCount)); - - (*pNeedsParenthesis)++; - break; - default: - // TODO: Handle bools? - ASSERT(0); - break; - } - return; + uint32_t ui32DestElementCount = psDest->GetNumSwizzleElements(ui32CompMask); + bstring glsl = *psContext->currentGLSLString; + SHADER_VARIABLE_TYPE eDestDataType = psDest->GetDataType(psContext); + ASSERT(pNeedsParenthesis != NULL); + + *pNeedsParenthesis = 0; + + glsl << TranslateOperand(psDest, TO_FLAG_DESTINATION, ui32CompMask); + + // Simple path: types match. + if (eDestDataType == eSrcType) + { + // Cover cases where the HLSL language expects the rest of the components to be default-filled + // eg. MOV r0, c0.x => Temp[0] = vec4(c0.x); + if (ui32DestElementCount > ui32SrcElementCount) + { + bformata(glsl, " %s %s(", szAssignmentOp, GetConstructorForTypeMetal(eDestDataType, ui32DestElementCount)); + *pNeedsParenthesis = 1; + } + else + bformata(glsl, " %s ", szAssignmentOp); + return; + } + // Up/downscaling with cast. The monster of condition there checks if the underlying datatypes are the same, just with prec differences + if (((eDestDataType == SVT_FLOAT || eDestDataType == SVT_FLOAT16 || eDestDataType == SVT_FLOAT10) && (eSrcType == SVT_FLOAT || eSrcType == SVT_FLOAT16 || eSrcType == SVT_FLOAT10)) + || ((eDestDataType == SVT_INT || eDestDataType == SVT_INT16 || eDestDataType == SVT_INT12) && (eSrcType == SVT_INT || eSrcType == SVT_INT16 || eSrcType == SVT_INT12)) + || ((eDestDataType == SVT_UINT || eDestDataType == SVT_UINT16) && (eSrcType == SVT_UINT || eSrcType == SVT_UINT16))) + { + bformata(glsl, " %s %s(", szAssignmentOp, GetConstructorForTypeMetal(eDestDataType, ui32DestElementCount)); + *pNeedsParenthesis = 1; + return; + } + + switch (eDestDataType) + { + case SVT_INT: + case SVT_INT12: + case SVT_INT16: + // Bitcasts from lower precisions are ambiguous + ASSERT(eSrcType != SVT_FLOAT10 && eSrcType != SVT_FLOAT16); + if (eSrcType == SVT_FLOAT) + { + if (ui32DestElementCount > 1) + bformata(glsl, " %s as_type(", szAssignmentOp, ui32DestElementCount); + else + bformata(glsl, " %s as_type(", szAssignmentOp); + + // Cover cases where the HLSL language expects the rest of the components to be default-filled + if (ui32DestElementCount > ui32SrcElementCount) + { + bformata(glsl, "%s(", GetConstructorForTypeMetal(eSrcType, ui32DestElementCount)); + (*pNeedsParenthesis)++; + } + } + else + bformata(glsl, " %s %s(", szAssignmentOp, GetConstructorForTypeMetal(eDestDataType, ui32DestElementCount)); + + (*pNeedsParenthesis)++; + break; + case SVT_UINT: + case SVT_UINT16: + ASSERT(eSrcType != SVT_FLOAT10 && eSrcType != SVT_FLOAT16); + if (eSrcType == SVT_FLOAT) + { + if (ui32DestElementCount > 1) + bformata(glsl, " %s as_type(", szAssignmentOp, ui32DestElementCount); + else + bformata(glsl, " %s as_type(", szAssignmentOp); + // Cover cases where the HLSL language expects the rest of the components to be default-filled + if (ui32DestElementCount > ui32SrcElementCount) + { + bformata(glsl, "%s(", GetConstructorForTypeMetal(eSrcType, ui32DestElementCount)); + (*pNeedsParenthesis)++; + } + } + else + bformata(glsl, " %s %s(", szAssignmentOp, GetConstructorForTypeMetal(eDestDataType, ui32DestElementCount)); + + (*pNeedsParenthesis)++; + break; + + case SVT_FLOAT: + case SVT_FLOAT10: + case SVT_FLOAT16: + ASSERT(eSrcType != SVT_INT12 || (eSrcType != SVT_INT16 && eSrcType != SVT_UINT16)); + if (psContext->psShader->ui32MajorVersion > 3) + { + if (ui32DestElementCount > 1) + bformata(glsl, " %s as_type(", szAssignmentOp, ui32DestElementCount); + else + bformata(glsl, " %s as_type(", szAssignmentOp); + // Cover cases where the HLSL language expects the rest of the components to be default-filled + if (ui32DestElementCount > ui32SrcElementCount) + { + bformata(glsl, "%s(", GetConstructorForTypeMetal(eSrcType, ui32DestElementCount)); + (*pNeedsParenthesis)++; + } + } + else + bformata(glsl, " %s %s(", szAssignmentOp, GetConstructorForTypeMetal(eDestDataType, ui32DestElementCount)); + + (*pNeedsParenthesis)++; + break; + default: + // TODO: Handle bools? + ASSERT(0); + break; + } } void ToMetal::AddAssignToDest(const Operand* psDest, - SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, int* pNeedsParenthesis) + SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, int* pNeedsParenthesis) { - AddOpAssignToDestWithMask(psDest, eSrcType, ui32SrcElementCount, "=", pNeedsParenthesis, OPERAND_4_COMPONENT_MASK_ALL); + AddOpAssignToDestWithMask(psDest, eSrcType, ui32SrcElementCount, "=", pNeedsParenthesis, OPERAND_4_COMPONENT_MASK_ALL); } void ToMetal::AddAssignPrologue(int numParenthesis) { - bstring glsl = *psContext->currentGLSLString; - while (numParenthesis != 0) - { - bcatcstr(glsl, ")"); - numParenthesis--; - } - bcatcstr(glsl, ";\n"); - + bstring glsl = *psContext->currentGLSLString; + while (numParenthesis != 0) + { + bcatcstr(glsl, ")"); + numParenthesis--; + } + bcatcstr(glsl, ";\n"); } void ToMetal::AddComparison(Instruction* psInst, ComparisonType eType, - uint32_t typeFlag) + uint32_t typeFlag) { - // Multiple cases to consider here: - // OPCODE_LT, _GT, _NE etc: inputs are floats, outputs UINT 0xffffffff or 0. typeflag: TO_FLAG_NONE - // OPCODE_ILT, _IGT etc: comparisons are signed ints, outputs UINT 0xffffffff or 0 typeflag TO_FLAG_INTEGER - // _ULT, UGT etc: inputs unsigned ints, outputs UINTs typeflag TO_FLAG_UNSIGNED_INTEGER - // - - - bstring glsl = *psContext->currentGLSLString; - const uint32_t destElemCount = psInst->asOperands[0].GetNumSwizzleElements(); - const uint32_t s0ElemCount = psInst->asOperands[1].GetNumSwizzleElements(); - const uint32_t s1ElemCount = psInst->asOperands[2].GetNumSwizzleElements(); - int isBoolDest = psInst->asOperands[0].GetDataType(psContext) == SVT_BOOL; - const uint32_t destMask = psInst->asOperands[0].GetAccessMask(); - - int needsParenthesis = 0; - if (typeFlag == TO_FLAG_NONE - && CanForceToHalfOperand(&psInst->asOperands[1]) - && CanForceToHalfOperand(&psInst->asOperands[2])) - typeFlag = TO_FLAG_FORCE_HALF; - ASSERT(s0ElemCount == s1ElemCount || s1ElemCount == 1 || s0ElemCount == 1); - if ((s0ElemCount != s1ElemCount) && (destElemCount > 1)) - { - // Set the proper auto-expand flag is either argument is scalar - typeFlag |= (TO_AUTO_EXPAND_TO_VEC2 << (std::min(std::max(s0ElemCount, s1ElemCount), destElemCount) - 2)); - } - if (destElemCount > 1) - { - const char* glslOpcode[] = { - "==", - "<", - ">=", - "!=", - }; - psContext->AddIndentation(); - if (isBoolDest) - { - glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION | TO_FLAG_BOOL); - bcatcstr(glsl, " = "); - } - else - { - AddAssignToDest(&psInst->asOperands[0], SVT_UINT, destElemCount, &needsParenthesis); - - bcatcstr(glsl, GetConstructorForTypeMetal(SVT_UINT, destElemCount)); - bcatcstr(glsl, "("); - } - bcatcstr(glsl, "("); - glsl << TranslateOperand(&psInst->asOperands[1], typeFlag, destMask); - bformata(glsl, "%s", glslOpcode[eType]); - glsl << TranslateOperand(&psInst->asOperands[2], typeFlag, destMask); - bcatcstr(glsl, ")"); - if (!isBoolDest) - { - bcatcstr(glsl, ")"); - bcatcstr(glsl, " * 0xFFFFFFFFu"); - } - - AddAssignPrologue(needsParenthesis); - } - else - { - const char* glslOpcode[] = { - "==", - "<", - ">=", - "!=", - }; - - //Scalar compare - - psContext->AddIndentation(); - if (isBoolDest) - { - glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION | TO_FLAG_BOOL); - bcatcstr(glsl, " = "); - } - else - { - AddAssignToDest(&psInst->asOperands[0], SVT_UINT, destElemCount, &needsParenthesis); - bcatcstr(glsl, "("); - } - glsl << TranslateOperand(&psInst->asOperands[1], typeFlag, destMask); - bformata(glsl, "%s", glslOpcode[eType]); - glsl << TranslateOperand(&psInst->asOperands[2], typeFlag, destMask); - if (!isBoolDest) - { - bcatcstr(glsl, ") ? 0xFFFFFFFFu : 0u"); - } - AddAssignPrologue(needsParenthesis); - } + // Multiple cases to consider here: + // OPCODE_LT, _GT, _NE etc: inputs are floats, outputs UINT 0xffffffff or 0. typeflag: TO_FLAG_NONE + // OPCODE_ILT, _IGT etc: comparisons are signed ints, outputs UINT 0xffffffff or 0 typeflag TO_FLAG_INTEGER + // _ULT, UGT etc: inputs unsigned ints, outputs UINTs typeflag TO_FLAG_UNSIGNED_INTEGER + // + + + bstring glsl = *psContext->currentGLSLString; + const uint32_t destElemCount = psInst->asOperands[0].GetNumSwizzleElements(); + const uint32_t s0ElemCount = psInst->asOperands[1].GetNumSwizzleElements(); + const uint32_t s1ElemCount = psInst->asOperands[2].GetNumSwizzleElements(); + int isBoolDest = psInst->asOperands[0].GetDataType(psContext) == SVT_BOOL; + const uint32_t destMask = psInst->asOperands[0].GetAccessMask(); + + int needsParenthesis = 0; + if (typeFlag == TO_FLAG_NONE + && CanForceToHalfOperand(&psInst->asOperands[1]) + && CanForceToHalfOperand(&psInst->asOperands[2])) + typeFlag = TO_FLAG_FORCE_HALF; + ASSERT(s0ElemCount == s1ElemCount || s1ElemCount == 1 || s0ElemCount == 1); + if ((s0ElemCount != s1ElemCount) && (destElemCount > 1)) + { + // Set the proper auto-expand flag is either argument is scalar + typeFlag |= (TO_AUTO_EXPAND_TO_VEC2 << (std::min(std::max(s0ElemCount, s1ElemCount), destElemCount) - 2)); + } + if (destElemCount > 1) + { + const char* glslOpcode[] = { + "==", + "<", + ">=", + "!=", + }; + psContext->AddIndentation(); + if (isBoolDest) + { + glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION | TO_FLAG_BOOL); + bcatcstr(glsl, " = "); + } + else + { + AddAssignToDest(&psInst->asOperands[0], SVT_UINT, destElemCount, &needsParenthesis); + + bcatcstr(glsl, GetConstructorForTypeMetal(SVT_UINT, destElemCount)); + bcatcstr(glsl, "("); + } + bcatcstr(glsl, "("); + glsl << TranslateOperand(&psInst->asOperands[1], typeFlag, destMask); + bformata(glsl, "%s", glslOpcode[eType]); + glsl << TranslateOperand(&psInst->asOperands[2], typeFlag, destMask); + bcatcstr(glsl, ")"); + if (!isBoolDest) + { + bcatcstr(glsl, ")"); + bcatcstr(glsl, " * 0xFFFFFFFFu"); + } + + AddAssignPrologue(needsParenthesis); + } + else + { + const char* glslOpcode[] = { + "==", + "<", + ">=", + "!=", + }; + + //Scalar compare + + psContext->AddIndentation(); + if (isBoolDest) + { + glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION | TO_FLAG_BOOL); + bcatcstr(glsl, " = "); + } + else + { + AddAssignToDest(&psInst->asOperands[0], SVT_UINT, destElemCount, &needsParenthesis); + bcatcstr(glsl, "("); + } + glsl << TranslateOperand(&psInst->asOperands[1], typeFlag, destMask); + bformata(glsl, "%s", glslOpcode[eType]); + glsl << TranslateOperand(&psInst->asOperands[2], typeFlag, destMask); + if (!isBoolDest) + { + bcatcstr(glsl, ") ? 0xFFFFFFFFu : uint(0)"); + } + AddAssignPrologue(needsParenthesis); + } } bool ToMetal::CanForceToHalfOperand(const Operand *psOperand) { - if (psOperand->GetDataType(psContext) == SVT_FLOAT16) - return true; - - if (psOperand->eType == OPERAND_TYPE_IMMEDIATE32 || psOperand->eType == OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER) - { - for (int i = 0; i < psOperand->iNumComponents; i++) - { - float val = fabs(psOperand->afImmediates[i]); - // Do not allow forcing immediate value to half if value is beyond half min/max boundaries - if (val != 0 && (val > 65504 || val < 6.10352e-5)) - return false; - } - return true; - } - - return false; + if (psOperand->GetDataType(psContext) == SVT_FLOAT16) + return true; + + if (psOperand->eType == OPERAND_TYPE_IMMEDIATE32 || psOperand->eType == OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER) + { + for (int i = 0; i < psOperand->iNumComponents; i++) + { + float val = fabs(psOperand->afImmediates[i]); + // Do not allow forcing immediate value to half if value is beyond half min/max boundaries + if (val != 0 && (val > 65504 || val < 6.10352e-5)) + return false; + } + return true; + } + + return false; } void ToMetal::AddMOVBinaryOp(const Operand *pDest, Operand *pSrc) { - bstring glsl = *psContext->currentGLSLString; - int numParenthesis = 0; - int srcSwizzleCount = pSrc->GetNumSwizzleElements(); - uint32_t writeMask = pDest->GetAccessMask(); + bstring glsl = *psContext->currentGLSLString; + int numParenthesis = 0; + int srcSwizzleCount = pSrc->GetNumSwizzleElements(); + uint32_t writeMask = pDest->GetAccessMask(); - const SHADER_VARIABLE_TYPE eSrcType = pSrc->GetDataType(psContext, pDest->GetDataType(psContext)); - uint32_t flags = SVTTypeToFlag(eSrcType); + const SHADER_VARIABLE_TYPE eSrcType = pSrc->GetDataType(psContext, pDest->GetDataType(psContext)); + uint32_t flags = SVTTypeToFlag(eSrcType); - AddAssignToDest(pDest, eSrcType, srcSwizzleCount, &numParenthesis); - glsl << TranslateOperand(pSrc, flags, writeMask); + AddAssignToDest(pDest, eSrcType, srcSwizzleCount, &numParenthesis); + glsl << TranslateOperand(pSrc, flags, writeMask); - AddAssignPrologue(numParenthesis); + AddAssignPrologue(numParenthesis); } void ToMetal::AddMOVCBinaryOp(const Operand *pDest, const Operand *src0, Operand *src1, Operand *src2) { - bstring glsl = *psContext->currentGLSLString; - uint32_t destElemCount = pDest->GetNumSwizzleElements(); - uint32_t s0ElemCount = src0->GetNumSwizzleElements(); - uint32_t s1ElemCount = src1->GetNumSwizzleElements(); - uint32_t s2ElemCount = src2->GetNumSwizzleElements(); - uint32_t destWriteMask = pDest->GetAccessMask(); - uint32_t destElem; - - const SHADER_VARIABLE_TYPE eDestType = pDest->GetDataType(psContext); - /* - for each component in dest[.mask] - if the corresponding component in src0 (POS-swizzle) - has any bit set - { - copy this component (POS-swizzle) from src1 into dest - } - else - { - copy this component (POS-swizzle) from src2 into dest - } - endfor - */ - - /* Single-component conditional variable (src0) */ - if (s0ElemCount == 1 || src0->IsSwizzleReplicated()) - { - int numParenthesis = 0; - SHADER_VARIABLE_TYPE s0Type = src0->GetDataType(psContext); - psContext->AddIndentation(); - AddAssignToDest(pDest, eDestType, destElemCount, &numParenthesis); - bcatcstr(glsl, "("); - if (s0Type == SVT_UINT || s0Type == SVT_UINT16) - glsl << TranslateOperand(src0, TO_AUTO_BITCAST_TO_UINT, OPERAND_4_COMPONENT_MASK_X); - else if (s0Type == SVT_BOOL) - glsl << TranslateOperand(src0, TO_FLAG_BOOL, OPERAND_4_COMPONENT_MASK_X); - else - glsl << TranslateOperand(src0, TO_AUTO_BITCAST_TO_INT, OPERAND_4_COMPONENT_MASK_X); - - if (psContext->psShader->ui32MajorVersion < 4) - { - //cmp opcode uses >= 0 - bcatcstr(glsl, " >= 0) ? "); - } - else - { - if (s0Type == SVT_UINT || s0Type == SVT_UINT16) - bcatcstr(glsl, " != 0u) ? "); - else if (s0Type == SVT_BOOL) - bcatcstr(glsl, ") ? "); - else - bcatcstr(glsl, " != 0) ? "); - } - - if (s1ElemCount == 1 && destElemCount > 1) - glsl << TranslateOperand(src1, SVTTypeToFlag(eDestType) | ElemCountToAutoExpandFlag(destElemCount)); - else - glsl << TranslateOperand(src1, SVTTypeToFlag(eDestType), destWriteMask); - - bcatcstr(glsl, " : "); - if (s2ElemCount == 1 && destElemCount > 1) - glsl << TranslateOperand(src2, SVTTypeToFlag(eDestType) | ElemCountToAutoExpandFlag(destElemCount)); - else - glsl << TranslateOperand(src2, SVTTypeToFlag(eDestType), destWriteMask); - - AddAssignPrologue(numParenthesis); - } - else - { - // TODO: We can actually do this in one op using mix(). - int srcElem = -1; - SHADER_VARIABLE_TYPE s0Type = src0->GetDataType(psContext); - - // Use an extra temp if dest is also one of the sources. Without this some swizzle combinations - // might alter the source before all components are handled. - const char* tempName = "hlslcc_movcTemp"; - bool dstIsSrc1 = (pDest->eType == src1->eType) && (pDest->ui32RegisterNumber == src1->ui32RegisterNumber); - bool dstIsSrc2 = (pDest->eType == src2->eType) && (pDest->ui32RegisterNumber == src2->ui32RegisterNumber); - - if (dstIsSrc1 || dstIsSrc2) - { - psContext->AddIndentation(); - bcatcstr(glsl, "{\n"); - ++psContext->indent; - psContext->AddIndentation(); + bstring glsl = *psContext->currentGLSLString; + uint32_t destElemCount = pDest->GetNumSwizzleElements(); + uint32_t s0ElemCount = src0->GetNumSwizzleElements(); + uint32_t s1ElemCount = src1->GetNumSwizzleElements(); + uint32_t s2ElemCount = src2->GetNumSwizzleElements(); + uint32_t destWriteMask = pDest->GetAccessMask(); + uint32_t destElem; + + const SHADER_VARIABLE_TYPE eDestType = pDest->GetDataType(psContext); + /* + for each component in dest[.mask] + if the corresponding component in src0 (POS-swizzle) + has any bit set + { + copy this component (POS-swizzle) from src1 into dest + } + else + { + copy this component (POS-swizzle) from src2 into dest + } + endfor + */ + + /* Single-component conditional variable (src0) */ + if (s0ElemCount == 1 || src0->IsSwizzleReplicated()) + { + int numParenthesis = 0; + SHADER_VARIABLE_TYPE s0Type = src0->GetDataType(psContext); + psContext->AddIndentation(); + AddAssignToDest(pDest, eDestType, destElemCount, &numParenthesis); + bcatcstr(glsl, "("); + if (s0Type == SVT_UINT || s0Type == SVT_UINT16) + glsl << TranslateOperand(src0, TO_AUTO_BITCAST_TO_UINT, OPERAND_4_COMPONENT_MASK_X); + else if (s0Type == SVT_BOOL) + glsl << TranslateOperand(src0, TO_FLAG_BOOL, OPERAND_4_COMPONENT_MASK_X); + else + glsl << TranslateOperand(src0, TO_AUTO_BITCAST_TO_INT, OPERAND_4_COMPONENT_MASK_X); + + if (psContext->psShader->ui32MajorVersion < 4) + { + //cmp opcode uses >= 0 + bcatcstr(glsl, " >= 0) ? "); + } + else + { + if (s0Type == SVT_UINT || s0Type == SVT_UINT16) + bcatcstr(glsl, " != uint(0)) ? "); + else if (s0Type == SVT_BOOL) + bcatcstr(glsl, ") ? "); + else + bcatcstr(glsl, " != 0) ? "); + } + + if (s1ElemCount == 1 && destElemCount > 1) + glsl << TranslateOperand(src1, SVTTypeToFlag(eDestType) | ElemCountToAutoExpandFlag(destElemCount)); + else + glsl << TranslateOperand(src1, SVTTypeToFlag(eDestType), destWriteMask); + + bcatcstr(glsl, " : "); + if (s2ElemCount == 1 && destElemCount > 1) + glsl << TranslateOperand(src2, SVTTypeToFlag(eDestType) | ElemCountToAutoExpandFlag(destElemCount)); + else + glsl << TranslateOperand(src2, SVTTypeToFlag(eDestType), destWriteMask); + + AddAssignPrologue(numParenthesis); + } + else + { + // TODO: We can actually do this in one op using mix(). + int srcElem = -1; + SHADER_VARIABLE_TYPE dstType = pDest->GetDataType(psContext); + SHADER_VARIABLE_TYPE s0Type = src0->GetDataType(psContext); + + // Use an extra temp if dest is also one of the sources. Without this some swizzle combinations + // might alter the source before all components are handled. + const std::string tempName = "hlslcc_movcTemp"; + bool dstIsSrc1 = (pDest->eType == src1->eType) + && (dstType == src1->GetDataType(psContext)) + && (pDest->ui32RegisterNumber == src1->ui32RegisterNumber); + bool dstIsSrc2 = (pDest->eType == src2->eType) + && (dstType == src2->GetDataType(psContext)) + && (pDest->ui32RegisterNumber == src2->ui32RegisterNumber); + + if (dstIsSrc1 || dstIsSrc2) + { + psContext->AddIndentation(); + bcatcstr(glsl, "{\n"); + ++psContext->indent; + psContext->AddIndentation(); int numComponents = (pDest->eType == OPERAND_TYPE_TEMP) ? psContext->psShader->GetTempComponentCount(eDestType, pDest->ui32RegisterNumber) : pDest->iNumComponents; - bformata(glsl, "%s %s = %s;\n", HLSLcc::GetConstructorForType(psContext, eDestType, numComponents), tempName, TranslateOperand(pDest, TO_FLAG_NAME_ONLY).c_str()); - } - - for (destElem = 0; destElem < 4; ++destElem) - { - int numParenthesis = 0; - srcElem++; - if (pDest->eSelMode == OPERAND_4_COMPONENT_MASK_MODE && pDest->ui32CompMask != 0 && !(pDest->ui32CompMask & (1 << destElem))) - continue; - - psContext->AddIndentation(); - AddOpAssignToDestWithMask(pDest, eDestType, 1, "=", &numParenthesis, 1 << destElem); - bcatcstr(glsl, "("); - if (s0Type == SVT_BOOL) - { - glsl << TranslateOperand(src0, TO_FLAG_BOOL, 1 << srcElem); - bcatcstr(glsl, ") ? "); - } - else - { - glsl << TranslateOperand(src0, TO_AUTO_BITCAST_TO_INT, 1 << srcElem); - - if (psContext->psShader->ui32MajorVersion < 4) - { - //cmp opcode uses >= 0 - bcatcstr(glsl, " >= 0) ? "); - } - else - { - bcatcstr(glsl, " != 0) ? "); - } - } - - if (!dstIsSrc1) - glsl << TranslateOperand(src1, SVTTypeToFlag(eDestType), 1 << srcElem); - else - bformata(glsl, "%s%s", tempName, TranslateOperandSwizzle(src1, 1 << srcElem, 0).c_str()); - - bcatcstr(glsl, " : "); - - if (!dstIsSrc2) - glsl << TranslateOperand(src2, SVTTypeToFlag(eDestType), 1 << srcElem); - else - bformata(glsl, "%s%s", tempName, TranslateOperandSwizzle(src2, 1 << srcElem, 0).c_str()); - - AddAssignPrologue(numParenthesis); - } - - if (dstIsSrc1 || dstIsSrc2) - { - --psContext->indent; - psContext->AddIndentation(); - bcatcstr(glsl, "}\n"); - } - } -} + bformata(glsl, "%s %s = %s;\n", HLSLcc::GetConstructorForType(psContext, eDestType, numComponents), tempName.c_str(), TranslateOperand(pDest, TO_FLAG_NAME_ONLY).c_str()); -void ToMetal::CallBinaryOp(const char* name, Instruction* psInst, - int dest, int src0, int src1, SHADER_VARIABLE_TYPE eDataType) -{ - uint32_t ui32Flags = SVTTypeToFlag(eDataType); - bstring glsl = *psContext->currentGLSLString; - uint32_t destMask = psInst->asOperands[dest].GetAccessMask(); - uint32_t src1SwizCount = psInst->asOperands[src1].GetNumSwizzleElements(destMask); - uint32_t src0SwizCount = psInst->asOperands[src0].GetNumSwizzleElements(destMask); - uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); - int needsParenthesis = 0; - - if (eDataType == SVT_FLOAT - && CanForceToHalfOperand(&psInst->asOperands[dest]) - && CanForceToHalfOperand(&psInst->asOperands[src0]) - && CanForceToHalfOperand(&psInst->asOperands[src1])) - { - ui32Flags = TO_FLAG_FORCE_HALF; - eDataType = SVT_FLOAT16; - } - - uint32_t maxElems = std::max(src1SwizCount, src0SwizCount); - if (src1SwizCount != src0SwizCount) - { - ui32Flags |= (TO_AUTO_EXPAND_TO_VEC2 << (maxElems - 2)); - } - - psContext->AddIndentation(); - - AddAssignToDest(&psInst->asOperands[dest], eDataType, dstSwizCount, &needsParenthesis); - -/* bool s0NeedsUpscaling = false, s1NeedsUpscaling = false; - SHADER_VARIABLE_TYPE s0Type = psInst->asOperands[src0].GetDataType(psContext); - SHADER_VARIABLE_TYPE s1Type = psInst->asOperands[src1].GetDataType(psContext); - - if((s0Type == SVT_FLOAT10 || s0Type == SVT_FLOAT16) && (s1Type != s) - */ - glsl << TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); - bformata(glsl, " %s ", name); - glsl << TranslateOperand(&psInst->asOperands[src1], ui32Flags, destMask); - - AddAssignPrologue(needsParenthesis); -} + // Override OPERAND_TYPE_TEMP name temporarily + const_cast(pDest)->specialName.assign(tempName); + } + for (destElem = 0; destElem < 4; ++destElem) + { + int numParenthesis = 0; + srcElem++; + if (pDest->eSelMode == OPERAND_4_COMPONENT_MASK_MODE && pDest->ui32CompMask != 0 && !(pDest->ui32CompMask & (1 << destElem))) + continue; + + psContext->AddIndentation(); + AddOpAssignToDestWithMask(pDest, eDestType, 1, "=", &numParenthesis, 1 << destElem); + bcatcstr(glsl, "("); + if (s0Type == SVT_BOOL) + { + glsl << TranslateOperand(src0, TO_FLAG_BOOL, 1 << srcElem); + bcatcstr(glsl, ") ? "); + } + else + { + glsl << TranslateOperand(src0, TO_AUTO_BITCAST_TO_INT, 1 << srcElem); + + if (psContext->psShader->ui32MajorVersion < 4) + { + //cmp opcode uses >= 0 + bcatcstr(glsl, " >= 0) ? "); + } + else + { + bcatcstr(glsl, " != 0) ? "); + } + } + glsl << TranslateOperand(src1, SVTTypeToFlag(eDestType), 1 << srcElem); + bcatcstr(glsl, " : "); + glsl << TranslateOperand(src2, SVTTypeToFlag(eDestType), 1 << srcElem); + AddAssignPrologue(numParenthesis); + } -void ToMetal::CallTernaryOp(const char* op1, const char* op2, Instruction* psInst, - int dest, int src0, int src1, int src2, uint32_t dataType) -{ - bstring glsl = *psContext->currentGLSLString; - uint32_t destMask = psInst->asOperands[dest].GetAccessMask(); - uint32_t src2SwizCount = psInst->asOperands[src2].GetNumSwizzleElements(destMask); - uint32_t src1SwizCount = psInst->asOperands[src1].GetNumSwizzleElements(destMask); - uint32_t src0SwizCount = psInst->asOperands[src0].GetNumSwizzleElements(destMask); - uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); - - uint32_t ui32Flags = dataType; - int numParenthesis = 0; - - if (dataType == TO_FLAG_NONE - && CanForceToHalfOperand(&psInst->asOperands[dest]) - && CanForceToHalfOperand(&psInst->asOperands[src0]) - && CanForceToHalfOperand(&psInst->asOperands[src1]) - && CanForceToHalfOperand(&psInst->asOperands[src2])) - ui32Flags = dataType = TO_FLAG_FORCE_HALF; - - if (src1SwizCount != src0SwizCount || src2SwizCount != src0SwizCount) - { - uint32_t maxElems = std::max(src2SwizCount, std::max(src1SwizCount, src0SwizCount)); - ui32Flags |= (TO_AUTO_EXPAND_TO_VEC2 << (maxElems - 2)); - } - - psContext->AddIndentation(); - - AddAssignToDest(&psInst->asOperands[dest], TypeFlagsToSVTType(dataType), dstSwizCount, &numParenthesis); - - glsl << TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); - bformata(glsl, " %s ", op1); - glsl << TranslateOperand(&psInst->asOperands[src1], ui32Flags, destMask); - bformata(glsl, " %s ", op2); - glsl << TranslateOperand(&psInst->asOperands[src2], ui32Flags, destMask); - AddAssignPrologue(numParenthesis); -} + if (dstIsSrc1 || dstIsSrc2) + { + const_cast(pDest)->specialName.clear(); -void ToMetal::CallHelper3(const char* name, Instruction* psInst, - int dest, int src0, int src1, int src2, int paramsShouldFollowWriteMask, uint32_t ui32Flags) -{ - bstring glsl = *psContext->currentGLSLString; - uint32_t destMask = paramsShouldFollowWriteMask ? psInst->asOperands[dest].GetAccessMask() : OPERAND_4_COMPONENT_MASK_ALL; - uint32_t src2SwizCount = psInst->asOperands[src2].GetNumSwizzleElements(destMask); - uint32_t src1SwizCount = psInst->asOperands[src1].GetNumSwizzleElements(destMask); - uint32_t src0SwizCount = psInst->asOperands[src0].GetNumSwizzleElements(destMask); - uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); - int numParenthesis = 0; - - if (CanForceToHalfOperand(&psInst->asOperands[dest]) - && CanForceToHalfOperand(&psInst->asOperands[src0]) - && CanForceToHalfOperand(&psInst->asOperands[src1]) - && CanForceToHalfOperand(&psInst->asOperands[src2])) - ui32Flags = TO_FLAG_FORCE_HALF | TO_AUTO_BITCAST_TO_FLOAT; - - if ((src1SwizCount != src0SwizCount || src2SwizCount != src0SwizCount) && paramsShouldFollowWriteMask) - { - uint32_t maxElems = std::max(src2SwizCount, std::max(src1SwizCount, src0SwizCount)); - ui32Flags |= (TO_AUTO_EXPAND_TO_VEC2 << (maxElems - 2)); - } - - psContext->AddIndentation(); - - AddAssignToDest(&psInst->asOperands[dest], ui32Flags & TO_FLAG_FORCE_HALF ? SVT_FLOAT16 : SVT_FLOAT, dstSwizCount, &numParenthesis); - - bformata(glsl, "%s(", name); - numParenthesis++; - glsl << TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); - bcatcstr(glsl, ", "); - glsl << TranslateOperand(&psInst->asOperands[src1], ui32Flags, destMask); - bcatcstr(glsl, ", "); - glsl << TranslateOperand(&psInst->asOperands[src2], ui32Flags, destMask); - AddAssignPrologue(numParenthesis); -} + psContext->AddIndentation(); + glsl << TranslateOperand(pDest, TO_FLAG_NAME_ONLY); + bformata(glsl, " = %s;\n", tempName.c_str()); -void ToMetal::CallHelper3(const char* name, Instruction* psInst, - int dest, int src0, int src1, int src2, int paramsShouldFollowWriteMask) -{ - CallHelper3(name, psInst, dest, src0, src1, src2, paramsShouldFollowWriteMask, TO_AUTO_BITCAST_TO_FLOAT); + --psContext->indent; + psContext->AddIndentation(); + bcatcstr(glsl, "}\n"); + } + } } -void ToMetal::CallHelper2(const char* name, Instruction* psInst, - int dest, int src0, int src1, int paramsShouldFollowWriteMask) +void ToMetal::CallBinaryOp(const char* name, Instruction* psInst, + int dest, int src0, int src1, SHADER_VARIABLE_TYPE eDataType) { - uint32_t ui32Flags = TO_AUTO_BITCAST_TO_FLOAT; - bstring glsl = *psContext->currentGLSLString; - uint32_t destMask = paramsShouldFollowWriteMask ? psInst->asOperands[dest].GetAccessMask() : OPERAND_4_COMPONENT_MASK_ALL; - uint32_t src1SwizCount = psInst->asOperands[src1].GetNumSwizzleElements(destMask); - uint32_t src0SwizCount = psInst->asOperands[src0].GetNumSwizzleElements(destMask); - uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); - - int isDotProduct = (strncmp(name, "dot", 3) == 0) ? 1 : 0; - int numParenthesis = 0; - - if (CanForceToHalfOperand(&psInst->asOperands[dest]) - && CanForceToHalfOperand(&psInst->asOperands[src0]) - && CanForceToHalfOperand(&psInst->asOperands[src1])) - ui32Flags = TO_FLAG_FORCE_HALF | TO_AUTO_BITCAST_TO_FLOAT; + uint32_t ui32Flags = SVTTypeToFlag(eDataType); + bstring glsl = *psContext->currentGLSLString; + uint32_t destMask = psInst->asOperands[dest].GetAccessMask(); + uint32_t src1SwizCount = psInst->asOperands[src1].GetNumSwizzleElements(destMask); + uint32_t src0SwizCount = psInst->asOperands[src0].GetNumSwizzleElements(destMask); + uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); + int needsParenthesis = 0; + + if (eDataType == SVT_FLOAT + && CanForceToHalfOperand(&psInst->asOperands[dest]) + && CanForceToHalfOperand(&psInst->asOperands[src0]) + && CanForceToHalfOperand(&psInst->asOperands[src1])) + { + ui32Flags = TO_FLAG_FORCE_HALF; + eDataType = SVT_FLOAT16; + } + uint32_t maxElems = std::max(src1SwizCount, src0SwizCount); + if (src1SwizCount != src0SwizCount) + { + ui32Flags |= (TO_AUTO_EXPAND_TO_VEC2 << (maxElems - 2)); + } - if ((src1SwizCount != src0SwizCount) && paramsShouldFollowWriteMask) - { - uint32_t maxElems = std::max(src1SwizCount, src0SwizCount); - ui32Flags |= (TO_AUTO_EXPAND_TO_VEC2 << (maxElems - 2)); - } + psContext->AddIndentation(); - psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[dest], ui32Flags & TO_FLAG_FORCE_HALF ? SVT_FLOAT16 : SVT_FLOAT, isDotProduct ? 1 : dstSwizCount, &numParenthesis); + AddAssignToDest(&psInst->asOperands[dest], eDataType, dstSwizCount, &needsParenthesis); - bformata(glsl, "%s(", name); - numParenthesis++; +/* bool s0NeedsUpscaling = false, s1NeedsUpscaling = false; + SHADER_VARIABLE_TYPE s0Type = psInst->asOperands[src0].GetDataType(psContext); + SHADER_VARIABLE_TYPE s1Type = psInst->asOperands[src1].GetDataType(psContext); - glsl << TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); - bcatcstr(glsl, ", "); - glsl << TranslateOperand(&psInst->asOperands[src1], ui32Flags, destMask); + if((s0Type == SVT_FLOAT10 || s0Type == SVT_FLOAT16) && (s1Type != s) + */ + glsl << TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); + bformata(glsl, " %s ", name); + glsl << TranslateOperand(&psInst->asOperands[src1], ui32Flags, destMask); - AddAssignPrologue(numParenthesis); + AddAssignPrologue(needsParenthesis); } -void ToMetal::CallHelper2Int(const char* name, Instruction* psInst, - int dest, int src0, int src1, int paramsShouldFollowWriteMask) +void ToMetal::CallTernaryOp(const char* op1, const char* op2, Instruction* psInst, + int dest, int src0, int src1, int src2, uint32_t dataType) { - uint32_t ui32Flags = TO_AUTO_BITCAST_TO_INT; - bstring glsl = *psContext->currentGLSLString; - uint32_t destMask = paramsShouldFollowWriteMask ? psInst->asOperands[dest].GetAccessMask() : OPERAND_4_COMPONENT_MASK_ALL; - uint32_t src1SwizCount = psInst->asOperands[src1].GetNumSwizzleElements(destMask); - uint32_t src0SwizCount = psInst->asOperands[src0].GetNumSwizzleElements(destMask); - uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); - int numParenthesis = 0; - - if ((src1SwizCount != src0SwizCount) && paramsShouldFollowWriteMask) - { - uint32_t maxElems = std::max(src1SwizCount, src0SwizCount); - ui32Flags |= (TO_AUTO_EXPAND_TO_VEC2 << (maxElems - 2)); - } - - psContext->AddIndentation(); - - AddAssignToDest(&psInst->asOperands[dest], SVT_INT, dstSwizCount, &numParenthesis); - - bformata(glsl, "%s(", name); - numParenthesis++; - glsl << TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); - bcatcstr(glsl, ", "); - glsl << TranslateOperand(&psInst->asOperands[src1], ui32Flags, destMask); - AddAssignPrologue(numParenthesis); -} + bstring glsl = *psContext->currentGLSLString; + uint32_t destMask = psInst->asOperands[dest].GetAccessMask(); + uint32_t src2SwizCount = psInst->asOperands[src2].GetNumSwizzleElements(destMask); + uint32_t src1SwizCount = psInst->asOperands[src1].GetNumSwizzleElements(destMask); + uint32_t src0SwizCount = psInst->asOperands[src0].GetNumSwizzleElements(destMask); + uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); + + uint32_t ui32Flags = dataType; + int numParenthesis = 0; + + if (dataType == TO_FLAG_NONE + && CanForceToHalfOperand(&psInst->asOperands[dest]) + && CanForceToHalfOperand(&psInst->asOperands[src0]) + && CanForceToHalfOperand(&psInst->asOperands[src1]) + && CanForceToHalfOperand(&psInst->asOperands[src2])) + ui32Flags = dataType = TO_FLAG_FORCE_HALF; + + if (src1SwizCount != src0SwizCount || src2SwizCount != src0SwizCount) + { + uint32_t maxElems = std::max(src2SwizCount, std::max(src1SwizCount, src0SwizCount)); + ui32Flags |= (TO_AUTO_EXPAND_TO_VEC2 << (maxElems - 2)); + } -void ToMetal::CallHelper2UInt(const char* name, Instruction* psInst, - int dest, int src0, int src1, int paramsShouldFollowWriteMask) -{ - uint32_t ui32Flags = TO_AUTO_BITCAST_TO_UINT; - bstring glsl = *psContext->currentGLSLString; - uint32_t destMask = paramsShouldFollowWriteMask ? psInst->asOperands[dest].GetAccessMask() : OPERAND_4_COMPONENT_MASK_ALL; - uint32_t src1SwizCount = psInst->asOperands[src1].GetNumSwizzleElements(destMask); - uint32_t src0SwizCount = psInst->asOperands[src0].GetNumSwizzleElements(destMask); - uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); - int numParenthesis = 0; - - if ((src1SwizCount != src0SwizCount) && paramsShouldFollowWriteMask) - { - uint32_t maxElems = std::max(src1SwizCount, src0SwizCount); - ui32Flags |= (TO_AUTO_EXPAND_TO_VEC2 << (maxElems - 2)); - } - - psContext->AddIndentation(); - - AddAssignToDest(&psInst->asOperands[dest], SVT_UINT, dstSwizCount, &numParenthesis); - - bformata(glsl, "%s(", name); - numParenthesis++; - glsl << TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); - bcatcstr(glsl, ", "); - glsl << TranslateOperand(&psInst->asOperands[src1], ui32Flags, destMask); - AddAssignPrologue(numParenthesis); -} + psContext->AddIndentation(); -void ToMetal::CallHelper1(const char* name, Instruction* psInst, - int dest, int src0, int paramsShouldFollowWriteMask) -{ - uint32_t ui32Flags = TO_AUTO_BITCAST_TO_FLOAT; - bstring glsl = *psContext->currentGLSLString; - uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); - uint32_t destMask = paramsShouldFollowWriteMask ? psInst->asOperands[dest].GetAccessMask() : OPERAND_4_COMPONENT_MASK_ALL; - int numParenthesis = 0; - - psContext->AddIndentation(); - if (CanForceToHalfOperand(&psInst->asOperands[dest]) - && CanForceToHalfOperand(&psInst->asOperands[src0])) - ui32Flags = TO_FLAG_FORCE_HALF | TO_AUTO_BITCAST_TO_FLOAT; - - AddAssignToDest(&psInst->asOperands[dest], ui32Flags & TO_FLAG_FORCE_HALF ? SVT_FLOAT16 : SVT_FLOAT, dstSwizCount, &numParenthesis); - - bformata(glsl, "%s(", name); - numParenthesis++; - glsl << TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); - AddAssignPrologue(numParenthesis); + AddAssignToDest(&psInst->asOperands[dest], TypeFlagsToSVTType(dataType), dstSwizCount, &numParenthesis); + + glsl << TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); + bformata(glsl, " %s ", op1); + glsl << TranslateOperand(&psInst->asOperands[src1], ui32Flags, destMask); + bformata(glsl, " %s ", op2); + glsl << TranslateOperand(&psInst->asOperands[src2], ui32Flags, destMask); + AddAssignPrologue(numParenthesis); } -//Result is an int. -void ToMetal::CallHelper1Int( - const char* name, - Instruction* psInst, - const int dest, - const int src0, - int paramsShouldFollowWriteMask) +void ToMetal::CallHelper3(const char* name, Instruction* psInst, + int dest, int src0, int src1, int src2, int paramsShouldFollowWriteMask, uint32_t ui32Flags) { - uint32_t ui32Flags = TO_AUTO_BITCAST_TO_INT; - bstring glsl = *psContext->currentGLSLString; - uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); - uint32_t destMask = paramsShouldFollowWriteMask ? psInst->asOperands[dest].GetAccessMask() : OPERAND_4_COMPONENT_MASK_ALL; - int numParenthesis = 0; + bstring glsl = *psContext->currentGLSLString; + uint32_t destMask = paramsShouldFollowWriteMask ? psInst->asOperands[dest].GetAccessMask() : OPERAND_4_COMPONENT_MASK_ALL; + uint32_t src2SwizCount = psInst->asOperands[src2].GetNumSwizzleElements(destMask); + uint32_t src1SwizCount = psInst->asOperands[src1].GetNumSwizzleElements(destMask); + uint32_t src0SwizCount = psInst->asOperands[src0].GetNumSwizzleElements(destMask); + uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); + int numParenthesis = 0; + + if (CanForceToHalfOperand(&psInst->asOperands[dest]) + && CanForceToHalfOperand(&psInst->asOperands[src0]) + && CanForceToHalfOperand(&psInst->asOperands[src1]) + && CanForceToHalfOperand(&psInst->asOperands[src2])) + ui32Flags = TO_FLAG_FORCE_HALF | TO_AUTO_BITCAST_TO_FLOAT; + + if ((src1SwizCount != src0SwizCount || src2SwizCount != src0SwizCount) && paramsShouldFollowWriteMask) + { + uint32_t maxElems = std::max(src2SwizCount, std::max(src1SwizCount, src0SwizCount)); + ui32Flags |= (TO_AUTO_EXPAND_TO_VEC2 << (maxElems - 2)); + } - psContext->AddIndentation(); + psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[dest], SVT_INT, dstSwizCount, &numParenthesis); + AddAssignToDest(&psInst->asOperands[dest], ui32Flags & TO_FLAG_FORCE_HALF ? SVT_FLOAT16 : SVT_FLOAT, dstSwizCount, &numParenthesis); - bformata(glsl, "%s(", name); - numParenthesis++; - glsl << TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); - AddAssignPrologue(numParenthesis); + bformata(glsl, "%s(", name); + numParenthesis++; + glsl << TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); + bcatcstr(glsl, ", "); + glsl << TranslateOperand(&psInst->asOperands[src1], ui32Flags, destMask); + bcatcstr(glsl, ", "); + glsl << TranslateOperand(&psInst->asOperands[src2], ui32Flags, destMask); + AddAssignPrologue(numParenthesis); } -void ToMetal::TranslateTexelFetch( - Instruction* psInst, - const ResourceBinding* psBinding, - bstring glsl) +void ToMetal::CallHelper3(const char* name, Instruction* psInst, + int dest, int src0, int src1, int src2, int paramsShouldFollowWriteMask) { - int numParenthesis = 0; - psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], psContext->psShader->sInfo.GetTextureDataType(psInst->asOperands[2].ui32RegisterNumber), 4, &numParenthesis); - glsl << TranslateOperand(&psInst->asOperands[2], TO_FLAG_NONE); - bcatcstr(glsl, ".read("); - - switch (psBinding->eDimension) - { - case REFLECT_RESOURCE_DIMENSION_BUFFER: - { - psContext->m_Reflection.OnDiagnostics("Buffer resources not supported in Metal (in texel fetch)", 0, true); - return; - } - case REFLECT_RESOURCE_DIMENSION_TEXTURE1D: - { - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_X); - break; - } - case REFLECT_RESOURCE_DIMENSION_TEXTURE1DARRAY: - { - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_X); - bcatcstr(glsl, ", "); - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_W); - break; - } - case REFLECT_RESOURCE_DIMENSION_TEXTURE2D: - { - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER | TO_AUTO_EXPAND_TO_VEC2, 3 /* .xy */); - bcatcstr(glsl, ", "); - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_W); // Lod level - break; - } - - case REFLECT_RESOURCE_DIMENSION_TEXTURE2DARRAY: - { - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER | TO_AUTO_EXPAND_TO_VEC2, 3 /* .xy */); - bcatcstr(glsl, ", "); - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_Z); // Array index - bcatcstr(glsl, ", "); - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_W); // Lod level - break; - } - case REFLECT_RESOURCE_DIMENSION_TEXTURE3D: - { - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER | TO_AUTO_EXPAND_TO_VEC3, 7 /* .xyz */); - bcatcstr(glsl, ", "); - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_W); // Lod level - break; - } - case REFLECT_RESOURCE_DIMENSION_TEXTURE2DMS: - { - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER | TO_AUTO_EXPAND_TO_VEC2, 3 /* .xy */); - bcatcstr(glsl, ", "); - glsl << TranslateOperand(&psInst->asOperands[3], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_X); // Sample index - break; - } - case REFLECT_RESOURCE_DIMENSION_TEXTURECUBE: - case REFLECT_RESOURCE_DIMENSION_TEXTURECUBEARRAY: - case REFLECT_RESOURCE_DIMENSION_BUFFEREX: - default: - { - // Shouldn't happen. Cubemap reads are not supported in HLSL - ASSERT(0); - break; - } - } - bcatcstr(glsl, ")"); - - glsl << TranslateOperandSwizzle(&psInst->asOperands[2], psInst->asOperands[0].GetAccessMask(), 0); - AddAssignPrologue(numParenthesis); + CallHelper3(name, psInst, dest, src0, src1, src2, paramsShouldFollowWriteMask, TO_AUTO_BITCAST_TO_FLOAT); } -void ToMetal::TranslateTexelFetchOffset( - Instruction* psInst, - const ResourceBinding* psBinding, - bstring glsl) +void ToMetal::CallHelper2(const char* name, Instruction* psInst, + int dest, int src0, int src1, int paramsShouldFollowWriteMask) { - int numParenthesis = 0; - psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], psContext->psShader->sInfo.GetTextureDataType(psInst->asOperands[2].ui32RegisterNumber), 4, &numParenthesis); - - glsl << TranslateOperand(&psInst->asOperands[2], TO_FLAG_NONE); - bcatcstr(glsl, ".read("); - - switch (psBinding->eDimension) - { - case REFLECT_RESOURCE_DIMENSION_BUFFER: - { - psContext->m_Reflection.OnDiagnostics("Buffer resources not supported in Metal (in texel fetch)", 0, true); - return; - } - case REFLECT_RESOURCE_DIMENSION_TEXTURE2DMSARRAY: - { - psContext->m_Reflection.OnDiagnostics("Multisampled texture arrays not supported in Metal (in texel fetch)", 0, true); - return; - } - case REFLECT_RESOURCE_DIMENSION_TEXTURE1D: - { - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_X); - bformata(glsl, " + %d", psInst->iUAddrOffset); - break; - } - case REFLECT_RESOURCE_DIMENSION_TEXTURE1DARRAY: - { - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_X); - bformata(glsl, " + %d, ", psInst->iUAddrOffset); - - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_Y); - break; - } - case REFLECT_RESOURCE_DIMENSION_TEXTURE2D: - { - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER | TO_AUTO_EXPAND_TO_VEC2, 3 /* .xy */); - bformata(glsl, "+ ivec2(%d, %d), ", psInst->iUAddrOffset, psInst->iVAddrOffset); - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_W); // Lod level - break; - } - case REFLECT_RESOURCE_DIMENSION_TEXTURE2DARRAY: - { - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER | TO_AUTO_EXPAND_TO_VEC2, 3 /* .xy */); - bformata(glsl, "+ ivec2(%d, %d), ", psInst->iUAddrOffset, psInst->iVAddrOffset); - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_Z); // Array index - bcatcstr(glsl, ", "); - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_W); // Lod level - break; - } - case REFLECT_RESOURCE_DIMENSION_TEXTURE3D: - { - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER | TO_AUTO_EXPAND_TO_VEC3, 7 /* .xyz */); - bformata(glsl, "+ ivec3(%d, %d, %d), ", psInst->iUAddrOffset, psInst->iVAddrOffset, psInst->iWAddrOffset); - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_W); // Lod level - break; - } - case REFLECT_RESOURCE_DIMENSION_TEXTURE2DMS: - { - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER | TO_AUTO_EXPAND_TO_VEC2, 3 /* .xy */); - bformata(glsl, "+ ivec2(%d, %d), ", psInst->iUAddrOffset, psInst->iVAddrOffset); - glsl << TranslateOperand(&psInst->asOperands[3], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_X); // Sample index - break; - } - case REFLECT_RESOURCE_DIMENSION_TEXTURECUBE: - case REFLECT_RESOURCE_DIMENSION_TEXTURECUBEARRAY: - case REFLECT_RESOURCE_DIMENSION_BUFFEREX: - default: - { - // Shouldn't happen. Cubemap reads are not supported in HLSL - ASSERT(0); - break; - } - } - bcatcstr(glsl, ")"); - - glsl << TranslateOperandSwizzle(&psInst->asOperands[2], psInst->asOperands[0].GetAccessMask(), 0); - AddAssignPrologue(numParenthesis); -} + uint32_t ui32Flags = TO_AUTO_BITCAST_TO_FLOAT; + bstring glsl = *psContext->currentGLSLString; + uint32_t destMask = paramsShouldFollowWriteMask ? psInst->asOperands[dest].GetAccessMask() : OPERAND_4_COMPONENT_MASK_ALL; + uint32_t src1SwizCount = psInst->asOperands[src1].GetNumSwizzleElements(destMask); + uint32_t src0SwizCount = psInst->asOperands[src0].GetNumSwizzleElements(destMask); + uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); + int isDotProduct = (strncmp(name, "dot", 3) == 0) ? 1 : 0; + int numParenthesis = 0; -//Makes sure the texture coordinate swizzle is appropriate for the texture type. -//i.e. vecX for X-dimension texture. -//Currently supports floating point coord only, so not used for texelFetch. -void ToMetal::TranslateTexCoord( - const RESOURCE_DIMENSION eResDim, - Operand* psTexCoordOperand) -{ - uint32_t flags = TO_AUTO_BITCAST_TO_FLOAT; - uint32_t opMask = OPERAND_4_COMPONENT_MASK_ALL; - bool isArray = false; - - switch (eResDim) - { - case RESOURCE_DIMENSION_TEXTURE1D: - { - //Vec1 texcoord. Mask out the other components. - opMask = OPERAND_4_COMPONENT_MASK_X; - break; - } - case RESOURCE_DIMENSION_TEXTURE1DARRAY: - { - // x for coord, y for array element - opMask = OPERAND_4_COMPONENT_MASK_X; - bstring glsl = *psContext->currentGLSLString; - glsl << TranslateOperand(psTexCoordOperand, flags, opMask); - - bcatcstr(glsl, ", round("); - - opMask = OPERAND_4_COMPONENT_MASK_Y; - flags = TO_AUTO_BITCAST_TO_FLOAT; - isArray = true; - break; - } - case RESOURCE_DIMENSION_TEXTURE2D: - { - //Vec2 texcoord. Mask out the other components. - opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y; - flags |= TO_AUTO_EXPAND_TO_VEC2; - break; - } - case RESOURCE_DIMENSION_TEXTURECUBE: - case RESOURCE_DIMENSION_TEXTURE3D: - { - //Vec3 texcoord. Mask out the other components. - opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y | OPERAND_4_COMPONENT_MASK_Z; - flags |= TO_AUTO_EXPAND_TO_VEC3; - break; - } - case RESOURCE_DIMENSION_TEXTURE2DARRAY: - { - // xy for coord, z for array element - opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y; - flags |= TO_AUTO_EXPAND_TO_VEC2; - - bstring glsl = *psContext->currentGLSLString; - glsl << TranslateOperand(psTexCoordOperand, flags, opMask); - - bcatcstr(glsl, ", round("); - - opMask = OPERAND_4_COMPONENT_MASK_Z; - flags = TO_AUTO_BITCAST_TO_FLOAT; - isArray = true; - break; - } - case RESOURCE_DIMENSION_TEXTURECUBEARRAY: - { - // xyz for coord, w for array element - opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y | OPERAND_4_COMPONENT_MASK_Z; - flags |= TO_AUTO_EXPAND_TO_VEC3; - - bstring glsl = *psContext->currentGLSLString; - glsl << TranslateOperand(psTexCoordOperand, flags, opMask); - - bcatcstr(glsl, ", round("); - - opMask = OPERAND_4_COMPONENT_MASK_W; - flags = TO_AUTO_BITCAST_TO_FLOAT; - isArray = true; - break; - } - default: - { - ASSERT(0); - break; - } - } - - //FIXME detect when integer coords are needed. - bstring glsl = *psContext->currentGLSLString; - glsl << TranslateOperand(psTexCoordOperand, flags, opMask); - - if (isArray) - bcatcstr(glsl, ")"); + if (CanForceToHalfOperand(&psInst->asOperands[dest]) + && CanForceToHalfOperand(&psInst->asOperands[src0]) + && CanForceToHalfOperand(&psInst->asOperands[src1])) + ui32Flags = TO_FLAG_FORCE_HALF | TO_AUTO_BITCAST_TO_FLOAT; -} -void ToMetal::GetResInfoData(Instruction* psInst, int index, int destElem) -{ - bstring glsl = *psContext->currentGLSLString; - int numParenthesis = 0; - const RESINFO_RETURN_TYPE eResInfoReturnType = psInst->eResInfoReturnType; - - psContext->AddIndentation(); - AddOpAssignToDestWithMask(&psInst->asOperands[0], eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_UINT ? SVT_UINT : SVT_FLOAT, 1, "=", &numParenthesis, 1 << destElem); - - const char *metalGetters[] = { ".get_width(", ".get_height(", ".get_depth(", ".get_num_mip_levels()" }; - int dim = GetNumTextureDimensions(psInst->eResDim); - if (dim < (index + 1) && index != 3) - { - bcatcstr(glsl, eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_UINT ? "0u" : "0.0"); - } - else - { - if (eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_FLOAT) - { - bcatcstr(glsl, "float("); - numParenthesis++; - } - else if (eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_RCPFLOAT) - { - bcatcstr(glsl, "1.0f / float("); - numParenthesis++; - } - glsl << TranslateOperand(&psInst->asOperands[2], TO_FLAG_NAME_ONLY); - if ((index == 1 && psInst->eResDim == RESOURCE_DIMENSION_TEXTURE1DARRAY) || - (index == 2 && (psInst->eResDim == RESOURCE_DIMENSION_TEXTURE2DARRAY || - psInst->eResDim == RESOURCE_DIMENSION_TEXTURE2DMSARRAY))) - { - bcatcstr(glsl, ".get_array_size()"); - } - else - { - bcatcstr(glsl, metalGetters[index]); - - if (index < 3) - { - if (psInst->eResDim != RESOURCE_DIMENSION_TEXTURE2DMS && - psInst->eResDim != RESOURCE_DIMENSION_TEXTURE2DMSARRAY) - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER); //mip level - - bcatcstr(glsl, ")"); - } - } - } - AddAssignPrologue(numParenthesis); -} + if ((src1SwizCount != src0SwizCount) && paramsShouldFollowWriteMask) + { + uint32_t maxElems = std::max(src1SwizCount, src0SwizCount); + ui32Flags |= (TO_AUTO_EXPAND_TO_VEC2 << (maxElems - 2)); + } -void ToMetal::TranslateTextureSample(Instruction* psInst, - uint32_t ui32Flags) -{ - bstring glsl = *psContext->currentGLSLString; - int numParenthesis = 0; - int hasParamOffset = (ui32Flags & TEXSMP_FLAG_PARAMOFFSET) ? 1 : 0; - - Operand* psDest = &psInst->asOperands[0]; - Operand* psDestAddr = &psInst->asOperands[1]; - Operand* psSrcOff = (ui32Flags & TEXSMP_FLAG_PARAMOFFSET) ? &psInst->asOperands[2] : 0; - Operand* psSrcTex = &psInst->asOperands[2 + hasParamOffset]; - Operand* psSrcSamp = &psInst->asOperands[3 + hasParamOffset]; - Operand* psSrcRef = (ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE) ? &psInst->asOperands[4 + hasParamOffset] : 0; - Operand* psSrcLOD = (ui32Flags & TEXSMP_FLAG_LOD) ? &psInst->asOperands[4] : 0; - Operand* psSrcDx = (ui32Flags & TEXSMP_FLAG_GRAD) ? &psInst->asOperands[4] : 0; - Operand* psSrcDy = (ui32Flags & TEXSMP_FLAG_GRAD) ? &psInst->asOperands[5] : 0; - Operand* psSrcBias = (ui32Flags & TEXSMP_FLAG_BIAS) ? &psInst->asOperands[4] : 0; - - const char *funcName = ""; - const char* gradSwizzle = ""; - const char *gradientName = ""; - - uint32_t ui32NumOffsets = 0; - - const RESOURCE_DIMENSION eResDim = psContext->psShader->aeResourceDims[psSrcTex->ui32RegisterNumber]; - - if (ui32Flags & TEXSMP_FLAG_GATHER) - { - if (ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE) - funcName = "gather_compare"; - else - funcName = "gather"; - } - else - { - if (ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE) - funcName = "sample_compare"; - else - funcName = "sample"; - } - - switch (eResDim) - { - case RESOURCE_DIMENSION_TEXTURE1D: - { - gradSwizzle = ".x"; - ui32NumOffsets = 1; - break; - } - case RESOURCE_DIMENSION_TEXTURE2D: - { - gradSwizzle = ".xy"; - gradientName = "gradient2d"; - ui32NumOffsets = 2; - break; - } - case RESOURCE_DIMENSION_TEXTURECUBE: - { - gradSwizzle = ".xyz"; - ui32NumOffsets = 3; - gradientName = "gradientcube"; - break; - } - case RESOURCE_DIMENSION_TEXTURE3D: - { - gradSwizzle = ".xyz"; - ui32NumOffsets = 3; - gradientName = "gradient3d"; - break; - } - case RESOURCE_DIMENSION_TEXTURE1DARRAY: - { - gradSwizzle = ".x"; - ui32NumOffsets = 1; - break; - } - case RESOURCE_DIMENSION_TEXTURE2DARRAY: - { - gradSwizzle = ".xy"; - ui32NumOffsets = 2; - gradientName = "gradient2d"; - break; - } - case RESOURCE_DIMENSION_TEXTURECUBEARRAY: - { - gradSwizzle = ".xyz"; - ui32NumOffsets = 3; - gradientName = "gradientcube"; - break; - } - default: - { - ASSERT(0); - break; - } - } - - - SHADER_VARIABLE_TYPE dataType = psContext->psShader->sInfo.GetTextureDataType(psSrcTex->ui32RegisterNumber); - psContext->AddIndentation(); - AddAssignToDest(psDest, dataType, psSrcTex->GetNumSwizzleElements(), &numParenthesis); - - std::string texName = TranslateOperand(psSrcTex, TO_FLAG_NAME_ONLY); - - // TextureName.FuncName( - glsl << texName; - bformata(glsl, ".%s(", funcName); - - bool isDepthSampler = false; - for(unsigned j = 0, m = m_Textures.size() ; j < m ; ++j) - { - if(m_Textures[j].name == texName) - { - isDepthSampler = m_Textures[j].isDepthSampler; - break; - } - } - - // Sampler name - //TODO: Is it ok to use fixed shadow sampler in all cases of depth compare or would we need more - // accurate way of detecting shadow cases (atm all depth compares are interpreted as shadow usage) - if (ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE) - { - bcatcstr(glsl, "_mtl_xl_shadow_sampler"); - } - else - { - std::string sampName = TranslateOperand(psSrcSamp, TO_FLAG_NAME_ONLY); - - // insert the "sampler" prefix if the sampler name is equal to the texture name (default sampler) - if (texName == sampName) - sampName.insert(0, "sampler"); - glsl << sampName; - } - - bcatcstr(glsl, ", "); - - // Texture coordinates - TranslateTexCoord(eResDim, psDestAddr); - - // Depth compare reference value - if (ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE) - { - bcatcstr(glsl, ", saturate("); // TODO: why the saturate here? - glsl << TranslateOperand(psSrcRef, TO_AUTO_BITCAST_TO_FLOAT); - bcatcstr(glsl, ")"); - } - - // lod_options (LOD/grad/bias) based on the flags - if (ui32Flags & TEXSMP_FLAG_LOD) - { - bcatcstr(glsl, ", level("); - glsl << TranslateOperand(psSrcLOD, TO_AUTO_BITCAST_TO_FLOAT); - if (psContext->psShader->ui32MajorVersion < 4) - { - bcatcstr(glsl, ".w"); - } - bcatcstr(glsl, ")"); - } - else if (ui32Flags & TEXSMP_FLAG_FIRSTLOD) - { - bcatcstr(glsl, ", level(0.0)"); - } - else if (ui32Flags & TEXSMP_FLAG_GRAD) - { - glsl << std::string(", ") << std::string(gradientName) << std::string("(float4("); - glsl << TranslateOperand(psSrcDx, TO_AUTO_BITCAST_TO_FLOAT); - bcatcstr(glsl, ")"); - bcatcstr(glsl, gradSwizzle); - bcatcstr(glsl, ", float4("); - glsl << TranslateOperand(psSrcDy, TO_AUTO_BITCAST_TO_FLOAT); - bcatcstr(glsl, ")"); - bcatcstr(glsl, gradSwizzle); - bcatcstr(glsl, ")"); - } - else if (ui32Flags & (TEXSMP_FLAG_BIAS)) - { - glsl << std::string(", bias(") << TranslateOperand(psSrcBias, TO_AUTO_BITCAST_TO_FLOAT) << std::string(")"); - } - - bool hadOffset = false; - - // Add offset param - if (psInst->bAddressOffset) - { - hadOffset = true; - if (ui32NumOffsets == 1) - { - bformata(glsl, ", %d", - psInst->iUAddrOffset); - } - else - if (ui32NumOffsets == 2) - { - bformata(glsl, ", int2(%d, %d)", - psInst->iUAddrOffset, - psInst->iVAddrOffset); - } - else - if (ui32NumOffsets == 3) - { - bformata(glsl, ", int3(%d, %d, %d)", - psInst->iUAddrOffset, - psInst->iVAddrOffset, - psInst->iWAddrOffset); - } - } - // HLSL gather has a variant with separate offset operand - else if (ui32Flags & TEXSMP_FLAG_PARAMOFFSET) - { - hadOffset = true; - uint32_t mask = OPERAND_4_COMPONENT_MASK_X; - if (ui32NumOffsets > 1) - mask |= OPERAND_4_COMPONENT_MASK_Y; - if (ui32NumOffsets > 2) - mask |= OPERAND_4_COMPONENT_MASK_Z; + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[dest], ui32Flags & TO_FLAG_FORCE_HALF ? SVT_FLOAT16 : SVT_FLOAT, isDotProduct ? 1 : dstSwizCount, &numParenthesis); - bcatcstr(glsl, ","); - glsl << TranslateOperand(psSrcOff, TO_FLAG_INTEGER, mask); - } + bformata(glsl, "%s(", name); + numParenthesis++; - // Add texture gather component selection if needed - if ((ui32Flags & TEXSMP_FLAG_GATHER) && psSrcSamp->GetNumSwizzleElements() > 0) - { - ASSERT(psSrcSamp->GetNumSwizzleElements() == 1); - if (psSrcSamp->aui32Swizzle[0] != OPERAND_4_COMPONENT_X) - { - if (!(ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE)) - { - // Need to add offset param to match func overload - if (!hadOffset) - { - if (ui32NumOffsets == 1) - bcatcstr(glsl, ", 0"); - else - bformata(glsl, ", int%d(0)", ui32NumOffsets); - } + glsl << TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); + bcatcstr(glsl, ", "); + glsl << TranslateOperand(&psInst->asOperands[src1], ui32Flags, destMask); - bcatcstr(glsl, ", component::"); - glsl << TranslateOperandSwizzle(psSrcSamp, OPERAND_4_COMPONENT_MASK_ALL, 0, false); - } - else - { - psContext->m_Reflection.OnDiagnostics("Metal supports gather compare only for the first component.", 0, true); - } - } + AddAssignPrologue(numParenthesis); +} + +void ToMetal::CallHelper2Int(const char* name, Instruction* psInst, + int dest, int src0, int src1, int paramsShouldFollowWriteMask) +{ + uint32_t ui32Flags = TO_AUTO_BITCAST_TO_INT; + bstring glsl = *psContext->currentGLSLString; + uint32_t destMask = paramsShouldFollowWriteMask ? psInst->asOperands[dest].GetAccessMask() : OPERAND_4_COMPONENT_MASK_ALL; + uint32_t src1SwizCount = psInst->asOperands[src1].GetNumSwizzleElements(destMask); + uint32_t src0SwizCount = psInst->asOperands[src0].GetNumSwizzleElements(destMask); + uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); + int numParenthesis = 0; + + if ((src1SwizCount != src0SwizCount) && paramsShouldFollowWriteMask) + { + uint32_t maxElems = std::max(src1SwizCount, src0SwizCount); + ui32Flags |= (TO_AUTO_EXPAND_TO_VEC2 << (maxElems - 2)); } - bcatcstr(glsl, ")"); + psContext->AddIndentation(); - if (!((ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE) || isDepthSampler) || (ui32Flags & TEXSMP_FLAG_GATHER)) - { - // iWriteMaskEnabled is forced off during DecodeOperand because swizzle on sampler uniforms - // does not make sense. But need to re-enable to correctly swizzle this particular instruction. - psSrcTex->iWriteMaskEnabled = 1; - glsl << TranslateOperandSwizzle(psSrcTex, psDest->GetAccessMask(), 0); - } - AddAssignPrologue(numParenthesis); -} + AddAssignToDest(&psInst->asOperands[dest], SVT_INT, dstSwizCount, &numParenthesis); + bformata(glsl, "%s(", name); + numParenthesis++; + glsl << TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); + bcatcstr(glsl, ", "); + glsl << TranslateOperand(&psInst->asOperands[src1], ui32Flags, destMask); + AddAssignPrologue(numParenthesis); +} -// Handle cases where vector components are accessed with dynamic index ([] notation). -// A bit ugly hack because compiled HLSL uses byte offsets to access data in structs => we are converting -// the offset back to vector component index in runtime => calculating stuff back and forth. -// TODO: Would be better to eliminate the offset calculation ops and use indexes straight on. Could be tricky though... -void ToMetal::TranslateDynamicComponentSelection(const ShaderVarType* psVarType, const Operand* psByteAddr, uint32_t offset, uint32_t mask) +void ToMetal::CallHelper2UInt(const char* name, Instruction* psInst, + int dest, int src0, int src1, int paramsShouldFollowWriteMask) { - bstring glsl = *psContext->currentGLSLString; - ASSERT(psVarType->Class == SVC_VECTOR); - - bcatcstr(glsl, "["); // Access vector component with [] notation - if (offset > 0) - bcatcstr(glsl, "("); + uint32_t ui32Flags = TO_AUTO_BITCAST_TO_UINT; + bstring glsl = *psContext->currentGLSLString; + uint32_t destMask = paramsShouldFollowWriteMask ? psInst->asOperands[dest].GetAccessMask() : OPERAND_4_COMPONENT_MASK_ALL; + uint32_t src1SwizCount = psInst->asOperands[src1].GetNumSwizzleElements(destMask); + uint32_t src0SwizCount = psInst->asOperands[src0].GetNumSwizzleElements(destMask); + uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); + int numParenthesis = 0; + + if ((src1SwizCount != src0SwizCount) && paramsShouldFollowWriteMask) + { + uint32_t maxElems = std::max(src1SwizCount, src0SwizCount); + ui32Flags |= (TO_AUTO_EXPAND_TO_VEC2 << (maxElems - 2)); + } - // The var containing byte address to the requested element - glsl << TranslateOperand(psByteAddr, TO_FLAG_UNSIGNED_INTEGER, mask); + psContext->AddIndentation(); - if (offset > 0)// If the vector is part of a struct, there is an extra offset in our byte address - bformata(glsl, " - %du)", offset); // Subtract that first + AddAssignToDest(&psInst->asOperands[dest], SVT_UINT, dstSwizCount, &numParenthesis); - bcatcstr(glsl, " >> 0x2u"); // Convert byte offset to index: div by four - bcatcstr(glsl, "]"); + bformata(glsl, "%s(", name); + numParenthesis++; + glsl << TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); + bcatcstr(glsl, ", "); + glsl << TranslateOperand(&psInst->asOperands[src1], ui32Flags, destMask); + AddAssignPrologue(numParenthesis); } -void ToMetal::TranslateShaderStorageStore(Instruction* psInst) +void ToMetal::CallHelper1(const char* name, Instruction* psInst, + int dest, int src0, int paramsShouldFollowWriteMask) { - bstring glsl = *psContext->currentGLSLString; - int component; - int srcComponent = 0; - - Operand* psDest = 0; - Operand* psDestAddr = 0; - Operand* psDestByteOff = 0; - Operand* psSrc = 0; - - - switch (psInst->eOpcode) - { - case OPCODE_STORE_STRUCTURED: - psDest = &psInst->asOperands[0]; - psDestAddr = &psInst->asOperands[1]; - psDestByteOff = &psInst->asOperands[2]; - psSrc = &psInst->asOperands[3]; - - break; - case OPCODE_STORE_RAW: - case OPCODE_STORE_UAV_TYPED: // Hack typed buffer as raw buf - psDest = &psInst->asOperands[0]; - psDestByteOff = &psInst->asOperands[1]; - psSrc = &psInst->asOperands[2]; - break; - default: - ASSERT(0); - break; - } - - uint32_t dstOffFlag = TO_FLAG_UNSIGNED_INTEGER; - SHADER_VARIABLE_TYPE dstOffType = psDestByteOff->GetDataType(psContext); - if (dstOffType == SVT_INT || dstOffType == SVT_INT16 || dstOffType == SVT_INT12) - dstOffFlag = TO_FLAG_INTEGER; - - for (component = 0; component < 4; component++) - { - ASSERT(psInst->asOperands[0].eSelMode == OPERAND_4_COMPONENT_MASK_MODE); - if (psInst->asOperands[0].ui32CompMask & (1 << component)) - { - psContext->AddIndentation(); - glsl << TranslateOperand(psDest, TO_FLAG_DESTINATION | TO_FLAG_NAME_ONLY); - - if (psDestAddr) - { - bcatcstr(glsl, "["); - glsl << TranslateOperand(psDestAddr, TO_FLAG_INTEGER | TO_FLAG_UNSIGNED_INTEGER); - bcatcstr(glsl, "].value"); - } - - bcatcstr(glsl, "[("); - glsl << TranslateOperand(psDestByteOff, dstOffFlag); - if (psInst->eOpcode == OPCODE_STORE_UAV_TYPED) - { - bcatcstr(glsl, ")"); - } - else - { - bcatcstr(glsl, " >> 2"); - if (dstOffFlag == TO_FLAG_UNSIGNED_INTEGER) - bcatcstr(glsl, "u"); - bcatcstr(glsl, ")"); - - if (component != 0) - { - bformata(glsl, " + %d", component); - if (dstOffFlag == TO_FLAG_UNSIGNED_INTEGER) - bcatcstr(glsl, "u"); - } - } - bcatcstr(glsl, "]"); - - //Dest type is currently always a uint array. - bcatcstr(glsl, " = "); - if (psSrc->GetNumSwizzleElements() > 1) - glsl << TranslateOperand(psSrc, TO_FLAG_UNSIGNED_INTEGER, 1 << (srcComponent++)); - else - glsl << TranslateOperand(psSrc, TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_X); - - bformata(glsl, ";\n"); - } - } + uint32_t ui32Flags = TO_AUTO_BITCAST_TO_FLOAT; + bstring glsl = *psContext->currentGLSLString; + uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); + uint32_t destMask = paramsShouldFollowWriteMask ? psInst->asOperands[dest].GetAccessMask() : OPERAND_4_COMPONENT_MASK_ALL; + int numParenthesis = 0; + + psContext->AddIndentation(); + if (CanForceToHalfOperand(&psInst->asOperands[dest]) + && CanForceToHalfOperand(&psInst->asOperands[src0])) + ui32Flags = TO_FLAG_FORCE_HALF | TO_AUTO_BITCAST_TO_FLOAT; + + AddAssignToDest(&psInst->asOperands[dest], ui32Flags & TO_FLAG_FORCE_HALF ? SVT_FLOAT16 : SVT_FLOAT, dstSwizCount, &numParenthesis); + + bformata(glsl, "%s(", name); + numParenthesis++; + glsl << TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); + AddAssignPrologue(numParenthesis); } -void ToMetal::TranslateShaderStorageLoad(Instruction* psInst) +//Result is an int. +void ToMetal::CallHelper1Int( + const char* name, + Instruction* psInst, + const int dest, + const int src0, + int paramsShouldFollowWriteMask) { - bstring glsl = *psContext->currentGLSLString; - int component; - Operand* psDest = 0; - Operand* psSrcAddr = 0; - Operand* psSrcByteOff = 0; - Operand* psSrc = 0; - - switch (psInst->eOpcode) - { - case OPCODE_LD_STRUCTURED: - psDest = &psInst->asOperands[0]; - psSrcAddr = &psInst->asOperands[1]; - psSrcByteOff = &psInst->asOperands[2]; - psSrc = &psInst->asOperands[3]; - break; - case OPCODE_LD_RAW: - case OPCODE_LD_UAV_TYPED: // Hack typed buffer as raw buf - psDest = &psInst->asOperands[0]; - psSrcByteOff = &psInst->asOperands[1]; - psSrc = &psInst->asOperands[2]; - break; - default: - ASSERT(0); - break; - } - - uint32_t destCount = psDest->GetNumSwizzleElements(); - uint32_t destMask = psDest->GetAccessMask(); - - int numParenthesis = 0; - int firstItemAdded = 0; - SHADER_VARIABLE_TYPE destDataType = psDest->GetDataType(psContext); - uint32_t srcOffFlag = TO_FLAG_UNSIGNED_INTEGER; - SHADER_VARIABLE_TYPE srcOffType = psSrcByteOff->GetDataType(psContext); - if (srcOffType == SVT_INT || srcOffType == SVT_INT16 || srcOffType == SVT_INT12) - srcOffFlag = TO_FLAG_INTEGER; - - psContext->AddIndentation(); - AddAssignToDest(psDest, destDataType, destCount, &numParenthesis); - if (destCount > 1) - { - bformata(glsl, "%s(", GetConstructorForTypeMetal(destDataType, destCount)); - numParenthesis++; - } - for (component = 0; component < 4; component++) - { - bool addedBitcast = false; - if (!(destMask & (1 << component))) - continue; - - if (firstItemAdded) - bcatcstr(glsl, ", "); - else - firstItemAdded = 1; - - // always uint array atm - if (destDataType == SVT_FLOAT) - { - // input already in uints, need bitcast - bcatcstr(glsl, "as_type("); - addedBitcast = true; - } - else if (destDataType == SVT_INT || destDataType == SVT_INT16 || destDataType == SVT_INT12) - { - bcatcstr(glsl, "int("); - addedBitcast = true; - } - - glsl << TranslateOperand(psSrc, TO_FLAG_NAME_ONLY); - - if (psSrcAddr) - { - bcatcstr(glsl, "["); - glsl << TranslateOperand(psSrcAddr, TO_FLAG_UNSIGNED_INTEGER | TO_FLAG_INTEGER); - bcatcstr(glsl, "].value"); - } - bcatcstr(glsl, "[("); - glsl << TranslateOperand(psSrcByteOff, srcOffFlag); - if (psInst->eOpcode == OPCODE_LD_UAV_TYPED) - { - bcatcstr(glsl, ")"); - } - else - { - bcatcstr(glsl, " >> 2"); - if (srcOffFlag == TO_FLAG_UNSIGNED_INTEGER) - bcatcstr(glsl, "u"); - - bformata(glsl, ") + %d", psSrc->eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE ? psSrc->aui32Swizzle[component] : component); - if (srcOffFlag == TO_FLAG_UNSIGNED_INTEGER) - bcatcstr(glsl, "u"); - } - bcatcstr(glsl, "]"); - - if (addedBitcast) - bcatcstr(glsl, ")"); - } - AddAssignPrologue(numParenthesis); -} + uint32_t ui32Flags = TO_AUTO_BITCAST_TO_INT; + bstring glsl = *psContext->currentGLSLString; + uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); + uint32_t destMask = paramsShouldFollowWriteMask ? psInst->asOperands[dest].GetAccessMask() : OPERAND_4_COMPONENT_MASK_ALL; + int numParenthesis = 0; -void ToMetal::TranslateAtomicMemOp(Instruction* psInst) -{ - bstring glsl = *psContext->currentGLSLString; - int numParenthesis = 0; - uint32_t ui32DataTypeFlag = TO_FLAG_INTEGER; - const char* func = ""; - Operand* dest = 0; - Operand* previousValue = 0; - Operand* destAddr = 0; - Operand* src = 0; - Operand* compare = 0; - int texDim = 0; - bool isUint = true; - bool shouldAddFailMemoryOrder = false; - bool shouldExtractCompare = false; - - switch (psInst->eOpcode) - { - case OPCODE_IMM_ATOMIC_IADD: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//IMM_ATOMIC_IADD\n"); -#endif - func = "atomic_fetch_add_explicit"; - previousValue = &psInst->asOperands[0]; - dest = &psInst->asOperands[1]; - destAddr = &psInst->asOperands[2]; - src = &psInst->asOperands[3]; - break; - } - case OPCODE_ATOMIC_IADD: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ATOMIC_IADD\n"); -#endif - func = "atomic_fetch_add_explicit"; - dest = &psInst->asOperands[0]; - destAddr = &psInst->asOperands[1]; - src = &psInst->asOperands[2]; - break; - } - case OPCODE_IMM_ATOMIC_AND: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//IMM_ATOMIC_AND\n"); -#endif - func = "atomic_fetch_and_explicit"; - previousValue = &psInst->asOperands[0]; - dest = &psInst->asOperands[1]; - destAddr = &psInst->asOperands[2]; - src = &psInst->asOperands[3]; - break; - } - case OPCODE_ATOMIC_AND: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ATOMIC_AND\n"); -#endif - func = "atomic_fetch_and_explicit"; - dest = &psInst->asOperands[0]; - destAddr = &psInst->asOperands[1]; - src = &psInst->asOperands[2]; - break; - } - case OPCODE_IMM_ATOMIC_OR: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//IMM_ATOMIC_OR\n"); -#endif - func = "atomic_fetch_or_explicit"; - previousValue = &psInst->asOperands[0]; - dest = &psInst->asOperands[1]; - destAddr = &psInst->asOperands[2]; - src = &psInst->asOperands[3]; - break; - } - case OPCODE_ATOMIC_OR: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ATOMIC_OR\n"); -#endif - func = "atomic_fetch_or_explicit"; - dest = &psInst->asOperands[0]; - destAddr = &psInst->asOperands[1]; - src = &psInst->asOperands[2]; - break; - } - case OPCODE_IMM_ATOMIC_XOR: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//IMM_ATOMIC_XOR\n"); -#endif - func = "atomic_fetch_xor_explicit"; - previousValue = &psInst->asOperands[0]; - dest = &psInst->asOperands[1]; - destAddr = &psInst->asOperands[2]; - src = &psInst->asOperands[3]; - break; - } - case OPCODE_ATOMIC_XOR: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ATOMIC_XOR\n"); -#endif - func = "atomic_fetch_xor_explicit"; - dest = &psInst->asOperands[0]; - destAddr = &psInst->asOperands[1]; - src = &psInst->asOperands[2]; - break; - } - - case OPCODE_IMM_ATOMIC_EXCH: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//IMM_ATOMIC_EXCH\n"); -#endif - func = "atomic_exchange_explicit"; - previousValue = &psInst->asOperands[0]; - dest = &psInst->asOperands[1]; - destAddr = &psInst->asOperands[2]; - src = &psInst->asOperands[3]; - break; - } - case OPCODE_IMM_ATOMIC_CMP_EXCH: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//IMM_ATOMIC_CMP_EXC\n"); -#endif - func = "atomic_compare_exchange_weak_explicit"; - previousValue = &psInst->asOperands[0]; - dest = &psInst->asOperands[1]; - destAddr = &psInst->asOperands[2]; - compare = &psInst->asOperands[3]; - src = &psInst->asOperands[4]; - shouldAddFailMemoryOrder = true; - shouldExtractCompare = true; - break; - } - case OPCODE_ATOMIC_CMP_STORE: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ATOMIC_CMP_STORE\n"); -#endif - func = "atomic_compare_exchange_weak_explicit"; - previousValue = 0; - dest = &psInst->asOperands[0]; - destAddr = &psInst->asOperands[1]; - compare = &psInst->asOperands[2]; - src = &psInst->asOperands[3]; - shouldAddFailMemoryOrder = true; - shouldExtractCompare = true; - break; - } - case OPCODE_IMM_ATOMIC_UMIN: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//IMM_ATOMIC_UMIN\n"); -#endif - func = "atomic_fetch_min_explicit"; - previousValue = &psInst->asOperands[0]; - dest = &psInst->asOperands[1]; - destAddr = &psInst->asOperands[2]; - src = &psInst->asOperands[3]; - break; - } - case OPCODE_ATOMIC_UMIN: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ATOMIC_UMIN\n"); -#endif - func = "atomic_fetch_min_explicit"; - dest = &psInst->asOperands[0]; - destAddr = &psInst->asOperands[1]; - src = &psInst->asOperands[2]; - break; -} - case OPCODE_IMM_ATOMIC_IMIN: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//IMM_ATOMIC_IMIN\n"); -#endif - func = "atomic_fetch_min_explicit"; - previousValue = &psInst->asOperands[0]; - dest = &psInst->asOperands[1]; - destAddr = &psInst->asOperands[2]; - src = &psInst->asOperands[3]; - break; - } - case OPCODE_ATOMIC_IMIN: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ATOMIC_IMIN\n"); -#endif - func = "atomic_fetch_min_explicit"; - dest = &psInst->asOperands[0]; - destAddr = &psInst->asOperands[1]; - src = &psInst->asOperands[2]; - break; - } - case OPCODE_IMM_ATOMIC_UMAX: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//IMM_ATOMIC_UMAX\n"); -#endif - func = "atomic_fetch_max_explicit"; - previousValue = &psInst->asOperands[0]; - dest = &psInst->asOperands[1]; - destAddr = &psInst->asOperands[2]; - src = &psInst->asOperands[3]; - break; - } - case OPCODE_ATOMIC_UMAX: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ATOMIC_UMAX\n"); -#endif - func = "atomic_fetch_max_explicit"; - dest = &psInst->asOperands[0]; - destAddr = &psInst->asOperands[1]; - src = &psInst->asOperands[2]; - break; - } - case OPCODE_IMM_ATOMIC_IMAX: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//IMM_ATOMIC_IMAX\n"); -#endif - func = "atomic_fetch_max_explicit"; - previousValue = &psInst->asOperands[0]; - dest = &psInst->asOperands[1]; - destAddr = &psInst->asOperands[2]; - src = &psInst->asOperands[3]; - break; - } - case OPCODE_ATOMIC_IMAX: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ATOMIC_IMAX\n"); -#endif - func = "atomic_fetch_max_explicit"; - dest = &psInst->asOperands[0]; - destAddr = &psInst->asOperands[1]; - src = &psInst->asOperands[2]; - break; - } - default: - ASSERT(0); - break; - } - - psContext->AddIndentation(); - - const ResourceBinding* psBinding = 0; - if (dest->eType != OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY) - { - psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_UAV, dest->ui32RegisterNumber, &psBinding); - - if (psBinding->eType == RTYPE_UAV_RWTYPED) - { - isUint = (psBinding->ui32ReturnType == RETURN_TYPE_UINT); - - // Find out if it's texture and of what dimension - switch (psBinding->eDimension) - { - case REFLECT_RESOURCE_DIMENSION_TEXTURE1D: - texDim = 1; - break; - case REFLECT_RESOURCE_DIMENSION_TEXTURECUBE: - case REFLECT_RESOURCE_DIMENSION_TEXTURE1DARRAY: - case REFLECT_RESOURCE_DIMENSION_TEXTURE2D: - case REFLECT_RESOURCE_DIMENSION_TEXTURE2DMS: - texDim = 2; - break; - case REFLECT_RESOURCE_DIMENSION_TEXTURE3D: - case REFLECT_RESOURCE_DIMENSION_TEXTURE2DARRAY: - case REFLECT_RESOURCE_DIMENSION_TEXTURE2DMSARRAY: - case REFLECT_RESOURCE_DIMENSION_TEXTURECUBEARRAY: - texDim = 3; - break; - case REFLECT_RESOURCE_DIMENSION_BUFFER: // Hack typed buffer as raw buf - break; - default: - ASSERT(0); - break; - } - } - } - - if (texDim > 0) - { - psContext->m_Reflection.OnDiagnostics("Texture atomics are not supported in Metal", 0, true); - return; - } - - if (isUint) - ui32DataTypeFlag = TO_FLAG_UNSIGNED_INTEGER | TO_AUTO_BITCAST_TO_UINT; - else - ui32DataTypeFlag = TO_FLAG_INTEGER | TO_AUTO_BITCAST_TO_INT; - - if (shouldExtractCompare) - { - bcatcstr(glsl, "{\n"); - ++psContext->indent; - psContext->AddIndentation(); - bcatcstr(glsl, "uint compare_value = "); - glsl << TranslateOperand(compare, ui32DataTypeFlag); - bcatcstr(glsl, ";\n"); - psContext->AddIndentation(); - } - else if (previousValue) - AddAssignToDest(previousValue, isUint ? SVT_UINT : SVT_INT, 1, &numParenthesis); - - bcatcstr(glsl, func); - bcatcstr(glsl, "("); - - uint32_t destAddrFlag = TO_FLAG_UNSIGNED_INTEGER; - SHADER_VARIABLE_TYPE destAddrType = destAddr->GetDataType(psContext); - if (destAddrType == SVT_INT || destAddrType == SVT_INT16 || destAddrType == SVT_INT12) - destAddrFlag = TO_FLAG_INTEGER; - - if(dest->eType == OPERAND_TYPE_UNORDERED_ACCESS_VIEW) - bcatcstr(glsl, "reinterpret_cast(&"); - else - bcatcstr(glsl, "reinterpret_cast(&"); - glsl << TranslateOperand(dest, TO_FLAG_DESTINATION | TO_FLAG_NAME_ONLY); - bcatcstr(glsl, "["); - glsl << TranslateOperand(destAddr, destAddrFlag, OPERAND_4_COMPONENT_MASK_X); - - if (!psBinding || psBinding->eType != RTYPE_UAV_RWTYPED) - { - // Structured buf if we have both x & y swizzles. Raw buf has only x -> no .value[] - if (destAddr->GetNumSwizzleElements(OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y) == 2) - { - bcatcstr(glsl, "]"); - bcatcstr(glsl, ".value["); - glsl << TranslateOperand(destAddr, destAddrFlag, OPERAND_4_COMPONENT_MASK_Y); - } - - bcatcstr(glsl, " >> 2");//bytes to floats - if (destAddrFlag == TO_FLAG_UNSIGNED_INTEGER) - bcatcstr(glsl, "u"); - } - bcatcstr(glsl, "]), "); - - if (compare) - { - if (shouldExtractCompare) - { - bcatcstr(glsl, "&compare_value, "); - } - else - { - glsl << TranslateOperand(compare, ui32DataTypeFlag); - bcatcstr(glsl, ", "); - } - } - - glsl << TranslateOperand(src, ui32DataTypeFlag); - bcatcstr(glsl, ", memory_order::memory_order_relaxed"); - if (shouldAddFailMemoryOrder) - bcatcstr(glsl, ", memory_order::memory_order_relaxed"); - bcatcstr(glsl, ")"); - if (previousValue) - { - AddAssignPrologue(numParenthesis); - } - else - bcatcstr(glsl, ";\n"); - - if (shouldExtractCompare) - { - if (previousValue) - { - psContext->AddIndentation(); - AddAssignToDest(previousValue, SVT_UINT, 1, &numParenthesis); - bcatcstr(glsl, "compare_value"); - AddAssignPrologue(numParenthesis); - } - --psContext->indent; - psContext->AddIndentation(); - bcatcstr(glsl, "}\n"); - } -} + psContext->AddIndentation(); -void ToMetal::TranslateConditional( - Instruction* psInst, - bstring glsl) -{ - const char* statement = ""; - if (psInst->eOpcode == OPCODE_BREAKC) - { - statement = "break"; - } - else if (psInst->eOpcode == OPCODE_CONTINUEC) - { - statement = "continue"; - } - else if (psInst->eOpcode == OPCODE_RETC) // FIXME! Need to spew out shader epilogue - { - if (psContext->psShader->eShaderType == COMPUTE_SHADER) - statement = "return"; - else - statement = "return output"; - } - - - int isBool = psInst->asOperands[0].GetDataType(psContext) == SVT_BOOL; - - if (isBool) - { - bcatcstr(glsl, "if("); - if (psInst->eBooleanTestType != INSTRUCTION_TEST_NONZERO) - bcatcstr(glsl, "!"); - glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_BOOL); - if (psInst->eOpcode != OPCODE_IF) - { - bformata(glsl, "){%s;}\n", statement); - } - else - { - bcatcstr(glsl, "){\n"); - } - } - else - { - if (psInst->eBooleanTestType == INSTRUCTION_TEST_ZERO) - { - bcatcstr(glsl, "if(("); - glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_UNSIGNED_INTEGER); - - if (psInst->eOpcode != OPCODE_IF) - { - bformata(glsl, ")==uint(0u)){%s;}\n", statement); - } - else - { - bcatcstr(glsl, ")==uint(0u)){\n"); - } - } - else - { - ASSERT(psInst->eBooleanTestType == INSTRUCTION_TEST_NONZERO); - bcatcstr(glsl, "if(("); - glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_UNSIGNED_INTEGER); - - if (psInst->eOpcode != OPCODE_IF) - { - bformata(glsl, ")!=uint(0u)){%s;}\n", statement); - } - else - { - bcatcstr(glsl, ")!=uint(0u)){\n"); - } - } - } + AddAssignToDest(&psInst->asOperands[dest], SVT_INT, dstSwizCount, &numParenthesis); + + bformata(glsl, "%s(", name); + numParenthesis++; + glsl << TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); + AddAssignPrologue(numParenthesis); } -void ToMetal::TranslateInstruction(Instruction* psInst) +void ToMetal::TranslateTexelFetch( + Instruction* psInst, + const ResourceBinding* psBinding, + bstring glsl) { - bstring glsl = *psContext->currentGLSLString; - int numParenthesis = 0; - -#ifdef _DEBUG - // Uncomment to print instruction IDs - //psContext->AddIndentation(); - //bformata(glsl, "//Instruction %d\n", psInst->id); -#if 0 - if(psInst->id == 73) - { - ASSERT(1); //Set breakpoint here to debug an instruction from its ID. - } -#endif -#endif - - switch (psInst->eOpcode) - { - case OPCODE_FTOI: - case OPCODE_FTOU: - { - uint32_t dstCount = psInst->asOperands[0].GetNumSwizzleElements(); - uint32_t srcCount = psInst->asOperands[1].GetNumSwizzleElements(); - SHADER_VARIABLE_TYPE castType = psInst->eOpcode == OPCODE_FTOU ? SVT_UINT : SVT_INT; -#ifdef _DEBUG - psContext->AddIndentation(); - if (psInst->eOpcode == OPCODE_FTOU) - bcatcstr(glsl, "//FTOU\n"); - else - bcatcstr(glsl, "//FTOI\n"); -#endif - switch (psInst->asOperands[0].eMinPrecision) - { - case OPERAND_MIN_PRECISION_DEFAULT: - break; - case OPERAND_MIN_PRECISION_SINT_16: - castType = SVT_INT16; - ASSERT(psInst->eOpcode == OPCODE_FTOI); - break; - case OPERAND_MIN_PRECISION_UINT_16: - castType = SVT_UINT16; - ASSERT(psInst->eOpcode == OPCODE_FTOU); - break; - default: - ASSERT(0); // We'd be doing bitcasts into low/mediump ints, not good. - } - psContext->AddIndentation(); - - AddAssignToDest(&psInst->asOperands[0], castType, srcCount, &numParenthesis); - bcatcstr(glsl, GetConstructorForTypeMetal(castType, dstCount)); - bcatcstr(glsl, "("); // 1 - glsl << TranslateOperand(&psInst->asOperands[1], TO_AUTO_BITCAST_TO_FLOAT, psInst->asOperands[0].GetAccessMask()); - bcatcstr(glsl, ")"); // 1 - AddAssignPrologue(numParenthesis); - break; - } - - case OPCODE_MOV: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//MOV\n"); -#endif - psContext->AddIndentation(); - AddMOVBinaryOp(&psInst->asOperands[0], &psInst->asOperands[1]); - break; - } - case OPCODE_ITOF://signed to float - case OPCODE_UTOF://unsigned to float - { - SHADER_VARIABLE_TYPE castType = SVT_FLOAT; - uint32_t dstCount = psInst->asOperands[0].GetNumSwizzleElements(); - uint32_t srcCount = psInst->asOperands[1].GetNumSwizzleElements(); - -#ifdef _DEBUG - psContext->AddIndentation(); - if (psInst->eOpcode == OPCODE_ITOF) - { - bcatcstr(glsl, "//ITOF\n"); - } - else - { - bcatcstr(glsl, "//UTOF\n"); - } -#endif - - switch (psInst->asOperands[0].eMinPrecision) - { - case OPERAND_MIN_PRECISION_DEFAULT: - break; - case OPERAND_MIN_PRECISION_FLOAT_2_8: - castType = SVT_FLOAT10; - break; - case OPERAND_MIN_PRECISION_FLOAT_16: - castType = SVT_FLOAT16; - break; - default: - ASSERT(0); // We'd be doing bitcasts into low/mediump ints, not good. - } - - psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], castType, srcCount, &numParenthesis); - bcatcstr(glsl, GetConstructorForTypeMetal(castType, dstCount)); - bcatcstr(glsl, "("); // 1 - glsl << TranslateOperand(&psInst->asOperands[1], psInst->eOpcode == OPCODE_UTOF ? TO_AUTO_BITCAST_TO_UINT : TO_AUTO_BITCAST_TO_INT, psInst->asOperands[0].GetAccessMask()); - bcatcstr(glsl, ")"); // 1 - AddAssignPrologue(numParenthesis); - break; - } - case OPCODE_MAD: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//MAD\n"); -#endif - CallHelper3("fma", psInst, 0, 1, 2, 3, 1); - break; - } - case OPCODE_IMAD: - { - uint32_t ui32Flags = TO_FLAG_INTEGER; -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//IMAD\n"); -#endif - - if (psInst->asOperands[0].GetDataType(psContext) == SVT_UINT) - { - ui32Flags = TO_FLAG_UNSIGNED_INTEGER; - } - - CallTernaryOp("*", "+", psInst, 0, 1, 2, 3, ui32Flags); - break; - } - case OPCODE_DFMA: - { - uint32_t ui32Flags = TO_FLAG_DOUBLE; -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//DFMA\n"); -#endif - CallHelper3("fma", psInst, 0, 1, 2, 3, 1, ui32Flags); - break; - } - case OPCODE_DADD: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//DADD\n"); -#endif - CallBinaryOp("+", psInst, 0, 1, 2, SVT_DOUBLE); - break; - } - case OPCODE_IADD: - { - SHADER_VARIABLE_TYPE eType = SVT_INT; -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//IADD\n"); -#endif - //Is this a signed or unsigned add? - if (psInst->asOperands[0].GetDataType(psContext) == SVT_UINT) - { - eType = SVT_UINT; - } - CallBinaryOp("+", psInst, 0, 1, 2, eType); - break; - } - case OPCODE_ADD: - { - -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ADD\n"); -#endif - CallBinaryOp("+", psInst, 0, 1, 2, SVT_FLOAT); - break; - } - case OPCODE_OR: - { - /*Todo: vector version */ -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//OR\n"); -#endif - if (psInst->asOperands[0].GetDataType(psContext) == SVT_BOOL) - { - uint32_t destMask = psInst->asOperands[0].GetAccessMask(); - - int needsParenthesis = 0; - psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], SVT_BOOL, psInst->asOperands[0].GetNumSwizzleElements(), &needsParenthesis); - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_BOOL, destMask); - bcatcstr(glsl, " || "); - glsl << TranslateOperand(&psInst->asOperands[2], TO_FLAG_BOOL, destMask); - AddAssignPrologue(needsParenthesis); - } - else - CallBinaryOp("|", psInst, 0, 1, 2, SVT_UINT); - break; - } - case OPCODE_AND: - { - SHADER_VARIABLE_TYPE eA = psInst->asOperands[1].GetDataType(psContext); - SHADER_VARIABLE_TYPE eB = psInst->asOperands[2].GetDataType(psContext); -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//AND\n"); -#endif - uint32_t destMask = psInst->asOperands[0].GetAccessMask(); - uint32_t dstSwizCount = psInst->asOperands[0].GetNumSwizzleElements(); - SHADER_VARIABLE_TYPE eDataType = psInst->asOperands[0].GetDataType(psContext); - uint32_t ui32Flags = SVTTypeToFlag(eDataType); - if (psInst->asOperands[0].GetDataType(psContext) == SVT_BOOL) - { - int needsParenthesis = 0; - psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], SVT_BOOL, psInst->asOperands[0].GetNumSwizzleElements(), &needsParenthesis); - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_BOOL, destMask); - bcatcstr(glsl, " && "); - glsl << TranslateOperand(&psInst->asOperands[2], TO_FLAG_BOOL, destMask); - AddAssignPrologue(needsParenthesis); - } - else if ((eA == SVT_BOOL || eB == SVT_BOOL) && !(eA == SVT_BOOL && eB == SVT_BOOL)) - { - int boolOp = eA == SVT_BOOL ? 1 : 2; - int otherOp = eA == SVT_BOOL ? 2 : 1; - int needsParenthesis = 0; - uint32_t i; - psContext->AddIndentation(); - - if (dstSwizCount == 1) - { - AddAssignToDest(&psInst->asOperands[0], eDataType, dstSwizCount, &needsParenthesis); - glsl << TranslateOperand(&psInst->asOperands[boolOp], TO_FLAG_BOOL, destMask); - bcatcstr(glsl, " ? "); - glsl << TranslateOperand(&psInst->asOperands[otherOp], ui32Flags, destMask); - bcatcstr(glsl, " : "); - - bcatcstr(glsl, GetConstructorForTypeMetal(eDataType, dstSwizCount)); - bcatcstr(glsl, "("); - for (i = 0; i < dstSwizCount; i++) - { - if (i > 0) - bcatcstr(glsl, ", "); - switch (eDataType) - { - case SVT_FLOAT: - case SVT_FLOAT10: - case SVT_FLOAT16: - case SVT_DOUBLE: - bcatcstr(glsl, "0.0"); - break; - default: - bcatcstr(glsl, "0"); - - } - } - bcatcstr(glsl, ")"); - } - else if (eDataType == SVT_FLOAT) - { - // We can use select() - AddAssignToDest(&psInst->asOperands[0], eDataType, dstSwizCount, &needsParenthesis); - bcatcstr(glsl, "select("); - bcatcstr(glsl, GetConstructorForTypeMetal(eDataType, dstSwizCount)); - bcatcstr(glsl, "("); - for (i = 0; i < dstSwizCount; i++) - { - if (i > 0) - bcatcstr(glsl, ", "); - switch (eDataType) - { - case SVT_FLOAT: - case SVT_FLOAT10: - case SVT_FLOAT16: - case SVT_DOUBLE: - bcatcstr(glsl, "0.0"); - break; - default: - bcatcstr(glsl, "0"); - - } - } - bcatcstr(glsl, "), "); - glsl << TranslateOperand(&psInst->asOperands[otherOp], ui32Flags, destMask); - bcatcstr(glsl, ", "); - bcatcstr(glsl, GetConstructorForTypeMetal(SVT_BOOL, dstSwizCount)); - bcatcstr(glsl, "("); - glsl << TranslateOperand(&psInst->asOperands[boolOp], TO_FLAG_BOOL, destMask); - bcatcstr(glsl, ")"); - bcatcstr(glsl, ")"); - } - else - { - AddAssignToDest(&psInst->asOperands[0], SVT_UINT, dstSwizCount, &needsParenthesis); - bcatcstr(glsl, "("); - bcatcstr(glsl, GetConstructorForTypeMetal(SVT_UINT, dstSwizCount)); - bcatcstr(glsl, "("); - glsl << TranslateOperand(&psInst->asOperands[boolOp], TO_FLAG_BOOL, destMask); - bcatcstr(glsl, ") * 0xffffffffu) & "); - glsl << TranslateOperand(&psInst->asOperands[otherOp], TO_FLAG_UNSIGNED_INTEGER, destMask); - } - - AddAssignPrologue(needsParenthesis); - } - else - { - CallBinaryOp("&", psInst, 0, 1, 2, SVT_UINT); - } - - - break; - } - case OPCODE_GE: - { - /* - dest = vec4(greaterThanEqual(vec4(srcA), vec4(srcB)); - Caveat: The result is a boolean but HLSL asm returns 0xFFFFFFFF/0x0 instead. - */ -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//GE\n"); -#endif - AddComparison(psInst, CMP_GE, TO_FLAG_NONE); - break; - } - case OPCODE_MUL: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//MUL\n"); -#endif - CallBinaryOp("*", psInst, 0, 1, 2, SVT_FLOAT); - break; - } - case OPCODE_IMUL: - { - SHADER_VARIABLE_TYPE eType = SVT_INT; -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//IMUL\n"); -#endif - if (psInst->asOperands[1].GetDataType(psContext) == SVT_UINT) - { - eType = SVT_UINT; - } - - ASSERT(psInst->asOperands[0].eType == OPERAND_TYPE_NULL); - - CallBinaryOp("*", psInst, 1, 2, 3, eType); - break; - } - case OPCODE_UDIV: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//UDIV\n"); -#endif - //destQuotient, destRemainder, src0, src1 - - // There are cases where destQuotient is the same variable as src0 or src1. If that happens, - // we need to compute "%" before the "/" in order to avoid src0 or src1 being overriden first. - if ((psInst->asOperands[0].eType != psInst->asOperands[2].eType || psInst->asOperands[0].ui32RegisterNumber != psInst->asOperands[2].ui32RegisterNumber) - && (psInst->asOperands[0].eType != psInst->asOperands[3].eType || psInst->asOperands[0].ui32RegisterNumber != psInst->asOperands[3].ui32RegisterNumber)) - { - CallBinaryOp("/", psInst, 0, 2, 3, SVT_UINT); - CallBinaryOp("%", psInst, 1, 2, 3, SVT_UINT); - } - else - { - CallBinaryOp("%", psInst, 1, 2, 3, SVT_UINT); - CallBinaryOp("/", psInst, 0, 2, 3, SVT_UINT); - } - break; - } - case OPCODE_DIV: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//DIV\n"); -#endif - CallBinaryOp("/", psInst, 0, 1, 2, SVT_FLOAT); - break; - } - case OPCODE_SINCOS: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//SINCOS\n"); -#endif - // Need careful ordering if src == dest[0], as then the cos() will be reading from wrong value - if (psInst->asOperands[0].eType == psInst->asOperands[2].eType && - psInst->asOperands[0].ui32RegisterNumber == psInst->asOperands[2].ui32RegisterNumber) - { - // sin() result overwrites source, do cos() first. - // The case where both write the src shouldn't really happen anyway. - if (psInst->asOperands[1].eType != OPERAND_TYPE_NULL) - { - CallHelper1("cos", psInst, 1, 2, 1); - } - - if (psInst->asOperands[0].eType != OPERAND_TYPE_NULL) - { - CallHelper1( - "sin", psInst, 0, 2, 1); - } - } - else - { - if (psInst->asOperands[0].eType != OPERAND_TYPE_NULL) - { - CallHelper1("sin", psInst, 0, 2, 1); - } - - if (psInst->asOperands[1].eType != OPERAND_TYPE_NULL) - { - CallHelper1("cos", psInst, 1, 2, 1); - } - } - break; - } - - case OPCODE_DP2: - { - int numParenthesis = 0; -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//DP2\n"); -#endif - psContext->AddIndentation(); - SHADER_VARIABLE_TYPE dstType = psInst->asOperands[0].GetDataType(psContext); - uint32_t typeFlags = TO_AUTO_BITCAST_TO_FLOAT | TO_AUTO_EXPAND_TO_VEC2; - if (CanForceToHalfOperand(&psInst->asOperands[1]) - && CanForceToHalfOperand(&psInst->asOperands[2])) - typeFlags = TO_FLAG_FORCE_HALF | TO_AUTO_EXPAND_TO_VEC2; - - if (dstType != SVT_FLOAT16) - dstType = SVT_FLOAT; - - AddAssignToDest(&psInst->asOperands[0], dstType, 1, &numParenthesis); - bcatcstr(glsl, "dot("); - glsl << TranslateOperand(&psInst->asOperands[1], typeFlags, 3 /* .xy */); - bcatcstr(glsl, ", "); - glsl << TranslateOperand(&psInst->asOperands[2], typeFlags, 3 /* .xy */); - bcatcstr(glsl, ")"); - AddAssignPrologue(numParenthesis); - break; - } - case OPCODE_DP3: - { - int numParenthesis = 0; -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//DP3\n"); -#endif - psContext->AddIndentation(); - SHADER_VARIABLE_TYPE dstType = psInst->asOperands[0].GetDataType(psContext); - uint32_t typeFlags = TO_AUTO_BITCAST_TO_FLOAT | TO_AUTO_EXPAND_TO_VEC3; - if (CanForceToHalfOperand(&psInst->asOperands[1]) - && CanForceToHalfOperand(&psInst->asOperands[2])) - typeFlags = TO_FLAG_FORCE_HALF | TO_AUTO_EXPAND_TO_VEC3; - - if (dstType != SVT_FLOAT16) - dstType = SVT_FLOAT; - - AddAssignToDest(&psInst->asOperands[0], dstType, 1, &numParenthesis); - bcatcstr(glsl, "dot("); - glsl << TranslateOperand(&psInst->asOperands[1], typeFlags, 7 /* .xyz */); - bcatcstr(glsl, ", "); - glsl << TranslateOperand(&psInst->asOperands[2], typeFlags, 7 /* .xyz */); - bcatcstr(glsl, ")"); - AddAssignPrologue(numParenthesis); - break; - } - case OPCODE_DP4: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//DP4\n"); -#endif - CallHelper2("dot", psInst, 0, 1, 2, 0); - break; - } - case OPCODE_INE: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//INE\n"); -#endif - AddComparison(psInst, CMP_NE, TO_FLAG_INTEGER); - break; - } - case OPCODE_NE: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//NE\n"); -#endif - AddComparison(psInst, CMP_NE, TO_FLAG_NONE); - break; - } - case OPCODE_IGE: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//IGE\n"); -#endif - AddComparison(psInst, CMP_GE, TO_FLAG_INTEGER); - break; - } - case OPCODE_ILT: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ILT\n"); -#endif - AddComparison(psInst, CMP_LT, TO_FLAG_INTEGER); - break; - } - case OPCODE_LT: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//LT\n"); -#endif - AddComparison(psInst, CMP_LT, TO_FLAG_NONE); - break; - } - case OPCODE_IEQ: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//IEQ\n"); -#endif - AddComparison(psInst, CMP_EQ, TO_FLAG_INTEGER); - break; - } - case OPCODE_ULT: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ULT\n"); -#endif - AddComparison(psInst, CMP_LT, TO_FLAG_UNSIGNED_INTEGER); - break; - } - case OPCODE_UGE: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//UGE\n"); -#endif - AddComparison(psInst, CMP_GE, TO_FLAG_UNSIGNED_INTEGER); - break; - } - case OPCODE_MOVC: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//MOVC\n"); -#endif - AddMOVCBinaryOp(&psInst->asOperands[0], &psInst->asOperands[1], &psInst->asOperands[2], &psInst->asOperands[3]); - break; - } - case OPCODE_SWAPC: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//SWAPC\n"); -#endif - // TODO needs temps!! - ASSERT(0); - AddMOVCBinaryOp(&psInst->asOperands[0], &psInst->asOperands[2], &psInst->asOperands[4], &psInst->asOperands[3]); - AddMOVCBinaryOp(&psInst->asOperands[1], &psInst->asOperands[2], &psInst->asOperands[3], &psInst->asOperands[4]); - break; - } - - case OPCODE_LOG: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//LOG\n"); -#endif - CallHelper1("log2", psInst, 0, 1, 1); - break; - } - case OPCODE_RSQ: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//RSQ\n"); -#endif - CallHelper1("rsqrt", psInst, 0, 1, 1); - break; - } - case OPCODE_EXP: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//EXP\n"); -#endif - CallHelper1("exp2", psInst, 0, 1, 1); - break; - } - case OPCODE_SQRT: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//SQRT\n"); -#endif - CallHelper1("sqrt", psInst, 0, 1, 1); - break; - } - case OPCODE_ROUND_PI: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ROUND_PI\n"); -#endif - CallHelper1("ceil", psInst, 0, 1, 1); - break; - } - case OPCODE_ROUND_NI: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ROUND_NI\n"); -#endif - CallHelper1("floor", psInst, 0, 1, 1); - break; - } - case OPCODE_ROUND_Z: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ROUND_Z\n"); -#endif - CallHelper1("trunc", psInst, 0, 1, 1); - break; - } - case OPCODE_ROUND_NE: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ROUND_NE\n"); -#endif - CallHelper1("rint", psInst, 0, 1, 1); - break; - } - case OPCODE_FRC: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//FRC\n"); -#endif - CallHelper1("fract", psInst, 0, 1, 1); - break; - } - case OPCODE_IMAX: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//IMAX\n"); -#endif - CallHelper2Int("max", psInst, 0, 1, 2, 1); - break; - } - case OPCODE_UMAX: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//UMAX\n"); -#endif - CallHelper2UInt("max", psInst, 0, 1, 2, 1); - break; - } - case OPCODE_MAX: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//MAX\n"); -#endif - CallHelper2("max", psInst, 0, 1, 2, 1); - break; - } - case OPCODE_IMIN: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//IMIN\n"); -#endif - CallHelper2Int("min", psInst, 0, 1, 2, 1); - break; - } - case OPCODE_UMIN: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//UMIN\n"); -#endif - CallHelper2UInt("min", psInst, 0, 1, 2, 1); - break; - } - case OPCODE_MIN: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//MIN\n"); -#endif - CallHelper2("min", psInst, 0, 1, 2, 1); - break; - } - case OPCODE_GATHER4: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//GATHER4\n"); -#endif - TranslateTextureSample(psInst, TEXSMP_FLAG_GATHER); - break; - } - case OPCODE_GATHER4_PO_C: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//GATHER4_PO_C\n"); -#endif - TranslateTextureSample(psInst, TEXSMP_FLAG_GATHER | TEXSMP_FLAG_PARAMOFFSET | TEXSMP_FLAG_DEPTHCOMPARE); - break; - } - case OPCODE_GATHER4_PO: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//GATHER4_PO\n"); -#endif - TranslateTextureSample(psInst, TEXSMP_FLAG_GATHER | TEXSMP_FLAG_PARAMOFFSET); - break; - } - case OPCODE_GATHER4_C: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//GATHER4_C\n"); -#endif - TranslateTextureSample(psInst, TEXSMP_FLAG_GATHER | TEXSMP_FLAG_DEPTHCOMPARE); - break; - } - case OPCODE_SAMPLE: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//SAMPLE\n"); -#endif - TranslateTextureSample(psInst, TEXSMP_FLAG_NONE); - break; - } - case OPCODE_SAMPLE_L: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//SAMPLE_L\n"); -#endif - TranslateTextureSample(psInst, TEXSMP_FLAG_LOD); - break; - } - case OPCODE_SAMPLE_C: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//SAMPLE_C\n"); -#endif - - TranslateTextureSample(psInst, TEXSMP_FLAG_DEPTHCOMPARE); - break; - } - case OPCODE_SAMPLE_C_LZ: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//SAMPLE_C_LZ\n"); -#endif - - TranslateTextureSample(psInst, TEXSMP_FLAG_DEPTHCOMPARE | TEXSMP_FLAG_FIRSTLOD); - break; - } - case OPCODE_SAMPLE_D: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//SAMPLE_D\n"); -#endif - - TranslateTextureSample(psInst, TEXSMP_FLAG_GRAD); - break; - } - case OPCODE_SAMPLE_B: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//SAMPLE_B\n"); -#endif - - TranslateTextureSample(psInst, TEXSMP_FLAG_BIAS); - break; - } - case OPCODE_RET: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//RET\n"); -#endif - if (psContext->psShader->asPhases[psContext->currentPhase].hasPostShaderCode) - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//--- Post shader code ---\n"); -#endif - bconcat(glsl, psContext->psShader->asPhases[psContext->currentPhase].postShaderCode); -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//--- End post shader code ---\n"); -#endif - } - psContext->AddIndentation(); - if(psContext->psShader->eShaderType == COMPUTE_SHADER) - bcatcstr(glsl, "return;\n"); - else - bcatcstr(glsl, "return output;\n"); - - break; - } - case OPCODE_INTERFACE_CALL: - { - ASSERT(0); - } - case OPCODE_LABEL: - { - ASSERT(0); // Never seen this - } - case OPCODE_COUNTBITS: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//COUNTBITS\n"); -#endif - psContext->AddIndentation(); - glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER | TO_FLAG_DESTINATION); - bcatcstr(glsl, " = popCount("); - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER); - bcatcstr(glsl, ");\n"); - break; - } - case OPCODE_FIRSTBIT_HI: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//FIRSTBIT_HI\n"); -#endif - DeclareExtraFunction("firstBit_hi", "template UVecType firstBit_hi(const UVecType input) { UVecType res = clz(input); return res; };"); - // TODO implement the 0-case (must return 0xffffffff) - psContext->AddIndentation(); - glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_UNSIGNED_INTEGER | TO_FLAG_DESTINATION); - bcatcstr(glsl, " = firstBit_hi("); - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER); - bcatcstr(glsl, ");\n"); - break; - } - case OPCODE_FIRSTBIT_LO: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//FIRSTBIT_LO\n"); -#endif - // TODO implement the 0-case (must return 0xffffffff) - DeclareExtraFunction("firstBit_lo", "template UVecType firstBit_lo(const UVecType input) { UVecType res = ctz(input); return res; };"); - psContext->AddIndentation(); - glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_UNSIGNED_INTEGER | TO_FLAG_DESTINATION); - bcatcstr(glsl, " = firstBit_lo("); - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER); - bcatcstr(glsl, ");\n"); - break; - } - case OPCODE_FIRSTBIT_SHI: //signed high - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//FIRSTBIT_SHI\n"); -#endif - // TODO Not at all correct for negative values yet. - DeclareExtraFunction("firstBit_shi", "template IVecType firstBit_shi(const IVecType input) { IVecType res = clz(input); return res; };"); - psContext->AddIndentation(); - glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER | TO_FLAG_DESTINATION); - bcatcstr(glsl, " = firstBit_shi("); - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER); - bcatcstr(glsl, ");\n"); - break; - } - case OPCODE_BFREV: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//BFREV\n"); -#endif - DeclareExtraFunction("bitReverse", "template UVecType bitReverse(const UVecType input)\n\ - { UVecType x = input;\n\ - x = (((x & 0xaaaaaaaa) >> 1) | ((x & 0x55555555) << 1));\n\ - x = (((x & 0xcccccccc) >> 2) | ((x & 0x33333333) << 2));\n\ - x = (((x & 0xf0f0f0f0) >> 4) | ((x & 0x0f0f0f0f) << 4));\n\ - x = (((x & 0xff00ff00) >> 8) | ((x & 0x00ff00ff) << 8));\n\ - return((x >> 16) | (x << 16));\n\ - }; "); - psContext->AddIndentation(); - glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER | TO_FLAG_DESTINATION); - bcatcstr(glsl, " = bitReverse("); - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER); - bcatcstr(glsl, ");\n"); - break; - } - case OPCODE_BFI: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//BFI\n"); -#endif - DeclareExtraFunction("BFI", "\ - template UVecType bitFieldInsert(const UVecType width, const UVecType offset, const UVecType src2, const UVecType src3)\n\ - {\n\ - UVecType bitmask = (((UVecType(1) << width)-1) << offset) & 0xffffffff;\n\ - return ((src2 << offset) & bitmask) | (src3 & ~bitmask);\n\ - }; "); - psContext->AddIndentation(); - - uint32_t destMask = psInst->asOperands[0].GetAccessMask(); - AddAssignToDest(&psInst->asOperands[0], SVT_UINT, psInst->asOperands[0].GetNumSwizzleElements(), &numParenthesis); - bcatcstr(glsl, "bitFieldInsert("); - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, destMask); - bcatcstr(glsl, ", "); - glsl << TranslateOperand(&psInst->asOperands[2], TO_FLAG_UNSIGNED_INTEGER, destMask); - bcatcstr(glsl, ", "); - glsl << TranslateOperand(&psInst->asOperands[3], TO_FLAG_UNSIGNED_INTEGER, destMask); - bcatcstr(glsl, ", "); - glsl << TranslateOperand(&psInst->asOperands[4], TO_FLAG_UNSIGNED_INTEGER, destMask); - bcatcstr(glsl, ")"); - - AddAssignPrologue(numParenthesis); - break; - } - case OPCODE_CUT: - case OPCODE_EMITTHENCUT_STREAM: - case OPCODE_EMIT: - case OPCODE_EMITTHENCUT: - case OPCODE_CUT_STREAM: - case OPCODE_EMIT_STREAM: - { - ASSERT(0); // Not on metal - } - case OPCODE_REP: - case OPCODE_ENDREP: - { - ASSERT(0); // Shouldn't see these anymore - } - case OPCODE_LOOP: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//LOOP\n"); -#endif - psContext->AddIndentation(); - - bcatcstr(glsl, "while(true){\n"); - ++psContext->indent; - break; - } - case OPCODE_ENDLOOP: - { - --psContext->indent; -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ENDLOOP\n"); -#endif - psContext->AddIndentation(); - bcatcstr(glsl, "}\n"); - break; - } - case OPCODE_BREAK: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//BREAK\n"); -#endif - psContext->AddIndentation(); - bcatcstr(glsl, "break;\n"); - break; - } - case OPCODE_BREAKC: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//BREAKC\n"); -#endif - psContext->AddIndentation(); - - TranslateConditional(psInst, glsl); - break; - } - case OPCODE_CONTINUEC: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//CONTINUEC\n"); -#endif - psContext->AddIndentation(); - - TranslateConditional(psInst, glsl); - break; - } - case OPCODE_IF: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//IF\n"); -#endif - psContext->AddIndentation(); - - TranslateConditional(psInst, glsl); - ++psContext->indent; - break; - } - case OPCODE_RETC: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//RETC\n"); -#endif - psContext->AddIndentation(); - - TranslateConditional(psInst, glsl); - break; - } - case OPCODE_ELSE: - { - --psContext->indent; -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ELSE\n"); -#endif - psContext->AddIndentation(); - bcatcstr(glsl, "} else {\n"); - psContext->indent++; - break; - } - case OPCODE_ENDSWITCH: - case OPCODE_ENDIF: - { - --psContext->indent; -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ENDIF\n"); -#endif - psContext->AddIndentation(); - bcatcstr(glsl, "}\n"); - break; - } - case OPCODE_CONTINUE: - { - psContext->AddIndentation(); - bcatcstr(glsl, "continue;\n"); - break; - } - case OPCODE_DEFAULT: - { - --psContext->indent; - psContext->AddIndentation(); - bcatcstr(glsl, "default:\n"); - ++psContext->indent; - break; - } - case OPCODE_NOP: - { - break; - } - case OPCODE_SYNC: - { - const uint32_t ui32SyncFlags = psInst->ui32SyncFlags; - -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//SYNC\n"); -#endif - const bool sync_threadgroup = (ui32SyncFlags & SYNC_THREAD_GROUP_SHARED_MEMORY) != 0; - const bool sync_device = (ui32SyncFlags & (SYNC_UNORDERED_ACCESS_VIEW_MEMORY_GROUP | SYNC_UNORDERED_ACCESS_VIEW_MEMORY_GLOBAL)) != 0; - - const char* barrierFlags = "mem_flags::mem_none"; - if(sync_threadgroup && sync_device) barrierFlags = "mem_flags::mem_threadgroup | mem_flags::mem_device"; - else if(sync_threadgroup) barrierFlags = "mem_flags::mem_threadgroup"; - else if(sync_device) barrierFlags = "mem_flags::mem_device"; - - if (ui32SyncFlags & SYNC_THREADS_IN_GROUP) - { - psContext->AddIndentation(); - bformata(glsl, "threadgroup_barrier(%s);\n", barrierFlags); - } - else - { - psContext->AddIndentation(); bformata(glsl, "#if __HAVE_SIMDGROUP_BARRIER__\n"); - psContext->AddIndentation(); bformata(glsl, "simdgroup_barrier(%s);\n", barrierFlags); - psContext->AddIndentation(); bformata(glsl, "#else\n"); - psContext->AddIndentation(); bformata(glsl, "threadgroup_barrier(%s);\n", barrierFlags); - psContext->AddIndentation(); bformata(glsl, "#endif\n"); - } - - break; - } - case OPCODE_SWITCH: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//SWITCH\n"); -#endif - psContext->AddIndentation(); - bcatcstr(glsl, "switch(int("); - glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER); - bcatcstr(glsl, ")){\n"); - - psContext->indent += 2; - break; - } - case OPCODE_CASE: - { - --psContext->indent; -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//case\n"); -#endif - psContext->AddIndentation(); - - bcatcstr(glsl, "case "); - glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER); - bcatcstr(glsl, ":\n"); - - ++psContext->indent; - break; - } - case OPCODE_EQ: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//EQ\n"); -#endif - AddComparison(psInst, CMP_EQ, TO_FLAG_NONE); - break; - } - case OPCODE_USHR: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//USHR\n"); -#endif - CallBinaryOp(">>", psInst, 0, 1, 2, SVT_UINT); - break; - } - case OPCODE_ISHL: - { - SHADER_VARIABLE_TYPE eType = SVT_INT; - -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ISHL\n"); -#endif - - if (psInst->asOperands[0].GetDataType(psContext) == SVT_UINT) - { - eType = SVT_UINT; - } - - CallBinaryOp("<<", psInst, 0, 1, 2, eType); - break; - } - case OPCODE_ISHR: - { - SHADER_VARIABLE_TYPE eType = SVT_INT; -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//ISHR\n"); -#endif - - if (psInst->asOperands[0].GetDataType(psContext) == SVT_UINT) - { - eType = SVT_UINT; - } - - CallBinaryOp(">>", psInst, 0, 1, 2, eType); - break; - } - case OPCODE_LD: - case OPCODE_LD_MS: - { - const ResourceBinding* psBinding = 0; -#ifdef _DEBUG - psContext->AddIndentation(); - if (psInst->eOpcode == OPCODE_LD) - bcatcstr(glsl, "//LD\n"); - else - bcatcstr(glsl, "//LD_MS\n"); -#endif - - psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_TEXTURE, psInst->asOperands[2].ui32RegisterNumber, &psBinding); - - if (psInst->eResDim == RESOURCE_DIMENSION_BUFFER) // Hack typed buffer as raw buf - { - psInst->eOpcode = OPCODE_LD_UAV_TYPED; - psInst->asOperands[1].eSelMode = OPERAND_4_COMPONENT_SELECT_1_MODE; - if (psInst->asOperands[1].eType == OPERAND_TYPE_IMMEDIATE32) - psInst->asOperands[1].iNumComponents = 1; - TranslateShaderStorageLoad(psInst); - break; - } - - if (psInst->bAddressOffset) - { - TranslateTexelFetchOffset(psInst, psBinding, glsl); - } - else - { - TranslateTexelFetch(psInst, psBinding, glsl); - } - break; - } - case OPCODE_DISCARD: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//DISCARD\n"); -#endif - psContext->AddIndentation(); - if (psInst->eBooleanTestType == INSTRUCTION_TEST_ZERO) - { - bcatcstr(glsl, "if(("); - glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER); - bcatcstr(glsl, ")==0){discard_fragment();}\n"); - } - else - { - ASSERT(psInst->eBooleanTestType == INSTRUCTION_TEST_NONZERO); - bcatcstr(glsl, "if(("); - glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER); - bcatcstr(glsl, ")!=0){discard_fragment();}\n"); - } - break; - } - case OPCODE_LOD: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//LOD\n"); -#endif - //LOD computes the following vector (ClampedLOD, NonClampedLOD, 0, 0) - - psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], SVT_FLOAT, 4, &numParenthesis); - - //If the core language does not have query-lod feature, - //then the extension is used. The name of the function - //changed between extension and core. - if (HaveQueryLod(psContext->psShader->eTargetLanguage)) - { - bcatcstr(glsl, "textureQueryLod("); - } - else - { - bcatcstr(glsl, "textureQueryLOD("); - } - - glsl << TranslateOperand(&psInst->asOperands[2], TO_FLAG_NONE); - bcatcstr(glsl, ","); - TranslateTexCoord( - psContext->psShader->aeResourceDims[psInst->asOperands[2].ui32RegisterNumber], - &psInst->asOperands[1]); - bcatcstr(glsl, ")"); - - //The swizzle on srcResource allows the returned values to be swizzled arbitrarily before they are written to the destination. - - // iWriteMaskEnabled is forced off during DecodeOperand because swizzle on sampler uniforms - // does not make sense. But need to re-enable to correctly swizzle this particular instruction. - psInst->asOperands[2].iWriteMaskEnabled = 1; - glsl << TranslateOperandSwizzle(&psInst->asOperands[2], psInst->asOperands[0].GetAccessMask(), 0); - AddAssignPrologue(numParenthesis); - break; - } - case OPCODE_EVAL_CENTROID: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//EVAL_CENTROID\n"); -#endif - psContext->AddIndentation(); - glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); - bcatcstr(glsl, " = interpolateAtCentroid("); - //interpolateAtCentroid accepts in-qualified variables. - //As long as bytecode only writes vX registers in declarations - //we should be able to use the declared name directly. - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_DECLARATION_NAME); - bcatcstr(glsl, ");\n"); - break; - } - case OPCODE_EVAL_SAMPLE_INDEX: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//EVAL_SAMPLE_INDEX\n"); -#endif - psContext->AddIndentation(); - glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); - bcatcstr(glsl, " = interpolateAtSample("); - //interpolateAtSample accepts in-qualified variables. - //As long as bytecode only writes vX registers in declarations - //we should be able to use the declared name directly. - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_DECLARATION_NAME); - bcatcstr(glsl, ", "); - glsl << TranslateOperand(&psInst->asOperands[2], TO_FLAG_INTEGER); - bcatcstr(glsl, ");\n"); - break; - } - case OPCODE_EVAL_SNAPPED: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//EVAL_SNAPPED\n"); -#endif - psContext->AddIndentation(); - glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); - bcatcstr(glsl, " = interpolateAtOffset("); - //interpolateAtOffset accepts in-qualified variables. - //As long as bytecode only writes vX registers in declarations - //we should be able to use the declared name directly. - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_DECLARATION_NAME); - bcatcstr(glsl, ", "); - glsl << TranslateOperand(&psInst->asOperands[2], TO_FLAG_INTEGER); - bcatcstr(glsl, ".xy);\n"); - break; - } - case OPCODE_LD_STRUCTURED: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//LD_STRUCTURED\n"); -#endif - TranslateShaderStorageLoad(psInst); - break; - } - case OPCODE_LD_UAV_TYPED: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//LD_UAV_TYPED\n"); -#endif - Operand* psDest = &psInst->asOperands[0]; - Operand* psSrc = &psInst->asOperands[2]; - Operand* psSrcAddr = &psInst->asOperands[1]; - - const ResourceBinding* psRes = 0; - psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_UAV, psSrc->ui32RegisterNumber, &psRes); - SHADER_VARIABLE_TYPE srcDataType = ResourceReturnTypeToSVTType(psRes->ui32ReturnType, psRes->ePrecision); + int numParenthesis = 0; + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], psContext->psShader->sInfo.GetTextureDataType(psInst->asOperands[2].ui32RegisterNumber), 4, &numParenthesis); + glsl << TranslateOperand(&psInst->asOperands[2], TO_FLAG_NONE); + bcatcstr(glsl, ".read("); - if (psInst->eResDim == RESOURCE_DIMENSION_BUFFER) // Hack typed buffer as raw buf + switch (psBinding->eDimension) + { + case REFLECT_RESOURCE_DIMENSION_BUFFER: { - psSrc->aeDataType[0] = srcDataType; - psSrcAddr->eSelMode = OPERAND_4_COMPONENT_SELECT_1_MODE; - if (psSrcAddr->eType == OPERAND_TYPE_IMMEDIATE32) - psSrcAddr->iNumComponents = 1; - TranslateShaderStorageLoad(psInst); - break; + psContext->m_Reflection.OnDiagnostics("Buffer resources not supported in Metal (in texel fetch)", 0, true); + return; } - -#define RRD(n) REFLECT_RESOURCE_DIMENSION_ ## n - - // unlike glsl, texture arrays will have index in separate argument - const bool isArray = psRes->eDimension == RRD(TEXTURE1DARRAY) || psRes->eDimension == RRD(TEXTURE2DARRAY) - || psRes->eDimension == RRD(TEXTURE2DMSARRAY) || psRes->eDimension == RRD(TEXTURECUBEARRAY); - - uint32_t flags = TO_FLAG_UNSIGNED_INTEGER, opMask = OPERAND_4_COMPONENT_MASK_ALL; - switch (psRes->eDimension) + case REFLECT_RESOURCE_DIMENSION_TEXTURE1D: { - case RRD(TEXTURE3D): - opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y | OPERAND_4_COMPONENT_MASK_Z; - flags |= TO_AUTO_EXPAND_TO_VEC3; + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_X); break; - case RRD(TEXTURECUBE): case RRD(TEXTURECUBEARRAY): - case RRD(TEXTURE2DARRAY): case RRD(TEXTURE2DMSARRAY): case RRD(TEXTURE2D): case RRD(TEXTURE2DMS): - opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y; - flags |= TO_AUTO_EXPAND_TO_VEC2; + } + case REFLECT_RESOURCE_DIMENSION_TEXTURE1DARRAY: + { + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_X); + bcatcstr(glsl, ", "); + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_W); break; - case RRD(TEXTURE1D): case RRD(TEXTURE1DARRAY): - opMask = OPERAND_4_COMPONENT_MASK_X; + } + case REFLECT_RESOURCE_DIMENSION_TEXTURE2D: + { + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER | TO_AUTO_EXPAND_TO_VEC2, 3 /* .xy */); + bcatcstr(glsl, ", "); + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_W); // Lod level break; - default: - ASSERT(0); break; } - int srcCount = psSrc->GetNumSwizzleElements(), numParenthesis = 0; - psContext->AddIndentation(); - AddAssignToDest(psDest, srcDataType, srcCount, &numParenthesis); - glsl << TranslateOperand(psSrc, TO_FLAG_NAME_ONLY); - bcatcstr(glsl, ".read("); - glsl << TranslateOperand(psSrcAddr, flags, opMask); - if(isArray) + case REFLECT_RESOURCE_DIMENSION_TEXTURE2DARRAY: { - // NB cube array is handled incorrectly - it needs extra "face" arg - switch (psRes->eDimension) - { - case RRD(TEXTURE1DARRAY): opMask = OPERAND_4_COMPONENT_MASK_Y; break; - case RRD(TEXTURE2DARRAY): case RRD(TEXTURE2DMSARRAY): opMask = OPERAND_4_COMPONENT_MASK_Z; break; - case RRD(TEXTURECUBEARRAY): opMask = OPERAND_4_COMPONENT_MASK_W; break; - default: ASSERT(0); break; - } - + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER | TO_AUTO_EXPAND_TO_VEC2, 3 /* .xy */); + bcatcstr(glsl, ", "); + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_Z); // Array index bcatcstr(glsl, ", "); - glsl << TranslateOperand(psSrcAddr, TO_FLAG_UNSIGNED_INTEGER, opMask); + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_W); // Lod level + break; } - bcatcstr(glsl, ")"); - glsl << TranslateOperandSwizzle(&psInst->asOperands[0], OPERAND_4_COMPONENT_MASK_ALL, 0); - AddAssignPrologue(numParenthesis); - -#undef RRD - - break; - } - case OPCODE_STORE_RAW: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//STORE_RAW\n"); -#endif - TranslateShaderStorageStore(psInst); - break; - } - case OPCODE_STORE_STRUCTURED: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//STORE_STRUCTURED\n"); -#endif - TranslateShaderStorageStore(psInst); - break; - } - - case OPCODE_STORE_UAV_TYPED: - { - const ResourceBinding* psRes; - int foundResource; - -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//STORE_UAV_TYPED\n"); -#endif - foundResource = psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_UAV, - psInst->asOperands[0].ui32RegisterNumber, - &psRes); - ASSERT(foundResource); - - if (psRes->eDimension == REFLECT_RESOURCE_DIMENSION_BUFFER) // Hack typed buffer as raw buf + case REFLECT_RESOURCE_DIMENSION_TEXTURE3D: { - psInst->asOperands[0].aeDataType[0] = ResourceReturnTypeToSVTType(psRes->ui32ReturnType, psRes->ePrecision); - psInst->asOperands[1].eSelMode = OPERAND_4_COMPONENT_SELECT_1_MODE; - if (psInst->asOperands[1].eType == OPERAND_TYPE_IMMEDIATE32) - psInst->asOperands[1].iNumComponents = 1; - TranslateShaderStorageStore(psInst); - break; + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER | TO_AUTO_EXPAND_TO_VEC3, 7 /* .xyz */); + bcatcstr(glsl, ", "); + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_W); // Lod level + break; } + case REFLECT_RESOURCE_DIMENSION_TEXTURE2DMS: + { + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER | TO_AUTO_EXPAND_TO_VEC2, 3 /* .xy */); + bcatcstr(glsl, ", "); + glsl << TranslateOperand(&psInst->asOperands[3], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_X); // Sample index + break; + } + case REFLECT_RESOURCE_DIMENSION_TEXTURECUBE: + case REFLECT_RESOURCE_DIMENSION_TEXTURECUBEARRAY: + case REFLECT_RESOURCE_DIMENSION_BUFFEREX: + default: + { + // Shouldn't happen. Cubemap reads are not supported in HLSL + ASSERT(0); + break; + } + } + bcatcstr(glsl, ")"); + + glsl << TranslateOperandSwizzle(&psInst->asOperands[2], psInst->asOperands[0].GetAccessMask(), 0); + AddAssignPrologue(numParenthesis); +} + +void ToMetal::TranslateTexelFetchOffset( + Instruction* psInst, + const ResourceBinding* psBinding, + bstring glsl) +{ + int numParenthesis = 0; + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], psContext->psShader->sInfo.GetTextureDataType(psInst->asOperands[2].ui32RegisterNumber), 4, &numParenthesis); + + glsl << TranslateOperand(&psInst->asOperands[2], TO_FLAG_NONE); + bcatcstr(glsl, ".read("); + + switch (psBinding->eDimension) + { + case REFLECT_RESOURCE_DIMENSION_BUFFER: + { + psContext->m_Reflection.OnDiagnostics("Buffer resources not supported in Metal (in texel fetch)", 0, true); + return; + } + case REFLECT_RESOURCE_DIMENSION_TEXTURE2DMSARRAY: + { + psContext->m_Reflection.OnDiagnostics("Multisampled texture arrays not supported in Metal (in texel fetch)", 0, true); + return; + } + case REFLECT_RESOURCE_DIMENSION_TEXTURE1D: + { + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_X); + bformata(glsl, " + %d", psInst->iUAddrOffset); + break; + } + case REFLECT_RESOURCE_DIMENSION_TEXTURE1DARRAY: + { + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_X); + bformata(glsl, " + %d, ", psInst->iUAddrOffset); + + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_Y); + break; + } + case REFLECT_RESOURCE_DIMENSION_TEXTURE2D: + { + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER | TO_AUTO_EXPAND_TO_VEC2, 3 /* .xy */); + bformata(glsl, "+ ivec2(%d, %d), ", psInst->iUAddrOffset, psInst->iVAddrOffset); + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_W); // Lod level + break; + } + case REFLECT_RESOURCE_DIMENSION_TEXTURE2DARRAY: + { + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER | TO_AUTO_EXPAND_TO_VEC2, 3 /* .xy */); + bformata(glsl, "+ ivec2(%d, %d), ", psInst->iUAddrOffset, psInst->iVAddrOffset); + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_Z); // Array index + bcatcstr(glsl, ", "); + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_W); // Lod level + break; + } + case REFLECT_RESOURCE_DIMENSION_TEXTURE3D: + { + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER | TO_AUTO_EXPAND_TO_VEC3, 7 /* .xyz */); + bformata(glsl, "+ ivec3(%d, %d, %d), ", psInst->iUAddrOffset, psInst->iVAddrOffset, psInst->iWAddrOffset); + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_W); // Lod level + break; + } + case REFLECT_RESOURCE_DIMENSION_TEXTURE2DMS: + { + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER | TO_AUTO_EXPAND_TO_VEC2, 3 /* .xy */); + bformata(glsl, "+ ivec2(%d, %d), ", psInst->iUAddrOffset, psInst->iVAddrOffset); + glsl << TranslateOperand(&psInst->asOperands[3], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_X); // Sample index + break; + } + case REFLECT_RESOURCE_DIMENSION_TEXTURECUBE: + case REFLECT_RESOURCE_DIMENSION_TEXTURECUBEARRAY: + case REFLECT_RESOURCE_DIMENSION_BUFFEREX: + default: + { + // Shouldn't happen. Cubemap reads are not supported in HLSL + ASSERT(0); + break; + } + } + bcatcstr(glsl, ")"); + + glsl << TranslateOperandSwizzle(&psInst->asOperands[2], psInst->asOperands[0].GetAccessMask(), 0); + AddAssignPrologue(numParenthesis); +} + +//Makes sure the texture coordinate swizzle is appropriate for the texture type. +//i.e. vecX for X-dimension texture. +//Currently supports floating point coord only, so not used for texelFetch. +void ToMetal::TranslateTexCoord( + const RESOURCE_DIMENSION eResDim, + Operand* psTexCoordOperand) +{ + uint32_t flags = TO_AUTO_BITCAST_TO_FLOAT; + uint32_t opMask = OPERAND_4_COMPONENT_MASK_ALL; + bool isArray = false; + + switch (eResDim) + { + case RESOURCE_DIMENSION_TEXTURE1D: + { + //Vec1 texcoord. Mask out the other components. + opMask = OPERAND_4_COMPONENT_MASK_X; + break; + } + case RESOURCE_DIMENSION_TEXTURE1DARRAY: + { + // x for coord, y for array element + opMask = OPERAND_4_COMPONENT_MASK_X; + bstring glsl = *psContext->currentGLSLString; + glsl << TranslateOperand(psTexCoordOperand, flags, opMask); + + bcatcstr(glsl, ", round("); + + opMask = OPERAND_4_COMPONENT_MASK_Y; + flags = TO_AUTO_BITCAST_TO_FLOAT; + isArray = true; + break; + } + case RESOURCE_DIMENSION_TEXTURE2D: + { + //Vec2 texcoord. Mask out the other components. + opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y; + flags |= TO_AUTO_EXPAND_TO_VEC2; + break; + } + case RESOURCE_DIMENSION_TEXTURECUBE: + case RESOURCE_DIMENSION_TEXTURE3D: + { + //Vec3 texcoord. Mask out the other components. + opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y | OPERAND_4_COMPONENT_MASK_Z; + flags |= TO_AUTO_EXPAND_TO_VEC3; + break; + } + case RESOURCE_DIMENSION_TEXTURE2DARRAY: + { + // xy for coord, z for array element + opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y; + flags |= TO_AUTO_EXPAND_TO_VEC2; + + bstring glsl = *psContext->currentGLSLString; + glsl << TranslateOperand(psTexCoordOperand, flags, opMask); + + bcatcstr(glsl, ", round("); + + opMask = OPERAND_4_COMPONENT_MASK_Z; + flags = TO_AUTO_BITCAST_TO_FLOAT; + isArray = true; + break; + } + case RESOURCE_DIMENSION_TEXTURECUBEARRAY: + { + // xyz for coord, w for array element + opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y | OPERAND_4_COMPONENT_MASK_Z; + flags |= TO_AUTO_EXPAND_TO_VEC3; + + bstring glsl = *psContext->currentGLSLString; + glsl << TranslateOperand(psTexCoordOperand, flags, opMask); + + bcatcstr(glsl, ", round("); + + opMask = OPERAND_4_COMPONENT_MASK_W; + flags = TO_AUTO_BITCAST_TO_FLOAT; + isArray = true; + break; + } + default: + { + ASSERT(0); + break; + } + } + + //FIXME detect when integer coords are needed. + bstring glsl = *psContext->currentGLSLString; + glsl << TranslateOperand(psTexCoordOperand, flags, opMask); + + if (isArray) + bcatcstr(glsl, ")"); +} + +void ToMetal::GetResInfoData(Instruction* psInst, int index, int destElem) +{ + bstring glsl = *psContext->currentGLSLString; + int numParenthesis = 0; + const RESINFO_RETURN_TYPE eResInfoReturnType = psInst->eResInfoReturnType; + + psContext->AddIndentation(); + AddOpAssignToDestWithMask(&psInst->asOperands[0], eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_UINT ? SVT_UINT : SVT_FLOAT, 1, "=", &numParenthesis, 1 << destElem); + + const char *metalGetters[] = { ".get_width(", ".get_height(", ".get_depth(", ".get_num_mip_levels()" }; + int dim = GetNumTextureDimensions(psInst->eResDim); + if (dim < (index + 1) && index != 3) + { + bcatcstr(glsl, eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_UINT ? "uint(0)" : "0.0"); + } + else + { + if (eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_FLOAT) + { + bcatcstr(glsl, "float("); + numParenthesis++; + } + else if (eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_RCPFLOAT) + { + bcatcstr(glsl, "1.0f / float("); + numParenthesis++; + } + glsl << TranslateOperand(&psInst->asOperands[2], TO_FLAG_NAME_ONLY); + if ((index == 1 && psInst->eResDim == RESOURCE_DIMENSION_TEXTURE1DARRAY) || + (index == 2 && (psInst->eResDim == RESOURCE_DIMENSION_TEXTURE2DARRAY || + psInst->eResDim == RESOURCE_DIMENSION_TEXTURE2DMSARRAY))) + { + bcatcstr(glsl, ".get_array_size()"); + } + else + { + bcatcstr(glsl, metalGetters[index]); + + if (index < 3) + { + if (psInst->eResDim != RESOURCE_DIMENSION_TEXTURE2DMS && + psInst->eResDim != RESOURCE_DIMENSION_TEXTURE2DMSARRAY) + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER); //mip level + + bcatcstr(glsl, ")"); + } + } + } + AddAssignPrologue(numParenthesis); +} + +void ToMetal::TranslateTextureSample(Instruction* psInst, + uint32_t ui32Flags) +{ + bstring glsl = *psContext->currentGLSLString; + int numParenthesis = 0; + int hasParamOffset = (ui32Flags & TEXSMP_FLAG_PARAMOFFSET) ? 1 : 0; + + Operand* psDest = &psInst->asOperands[0]; + Operand* psDestAddr = &psInst->asOperands[1]; + Operand* psSrcOff = (ui32Flags & TEXSMP_FLAG_PARAMOFFSET) ? &psInst->asOperands[2] : 0; + Operand* psSrcTex = &psInst->asOperands[2 + hasParamOffset]; + Operand* psSrcSamp = &psInst->asOperands[3 + hasParamOffset]; + Operand* psSrcRef = (ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE) ? &psInst->asOperands[4 + hasParamOffset] : 0; + Operand* psSrcLOD = (ui32Flags & TEXSMP_FLAG_LOD) ? &psInst->asOperands[4] : 0; + Operand* psSrcDx = (ui32Flags & TEXSMP_FLAG_GRAD) ? &psInst->asOperands[4] : 0; + Operand* psSrcDy = (ui32Flags & TEXSMP_FLAG_GRAD) ? &psInst->asOperands[5] : 0; + Operand* psSrcBias = (ui32Flags & TEXSMP_FLAG_BIAS) ? &psInst->asOperands[4] : 0; + + const char *funcName = ""; + const char* gradSwizzle = ""; + const char *gradientName = ""; + + uint32_t ui32NumOffsets = 0; + + const RESOURCE_DIMENSION eResDim = psContext->psShader->aeResourceDims[psSrcTex->ui32RegisterNumber]; + + if (ui32Flags & TEXSMP_FLAG_GATHER) + { + if (ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE) + funcName = "gather_compare"; + else + funcName = "gather"; + } + else + { + if (ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE) + funcName = "sample_compare"; + else + funcName = "sample"; + } + + switch (eResDim) + { + case RESOURCE_DIMENSION_TEXTURE1D: + { + gradSwizzle = ".x"; + ui32NumOffsets = 1; + break; + } + case RESOURCE_DIMENSION_TEXTURE2D: + { + gradSwizzle = ".xy"; + gradientName = "gradient2d"; + ui32NumOffsets = 2; + break; + } + case RESOURCE_DIMENSION_TEXTURECUBE: + { + gradSwizzle = ".xyz"; + ui32NumOffsets = 3; + gradientName = "gradientcube"; + break; + } + case RESOURCE_DIMENSION_TEXTURE3D: + { + gradSwizzle = ".xyz"; + ui32NumOffsets = 3; + gradientName = "gradient3d"; + break; + } + case RESOURCE_DIMENSION_TEXTURE1DARRAY: + { + gradSwizzle = ".x"; + ui32NumOffsets = 1; + break; + } + case RESOURCE_DIMENSION_TEXTURE2DARRAY: + { + gradSwizzle = ".xy"; + ui32NumOffsets = 2; + gradientName = "gradient2d"; + break; + } + case RESOURCE_DIMENSION_TEXTURECUBEARRAY: + { + gradSwizzle = ".xyz"; + ui32NumOffsets = 3; + gradientName = "gradientcube"; + break; + } + default: + { + ASSERT(0); + break; + } + } + + + SHADER_VARIABLE_TYPE dataType = psContext->psShader->sInfo.GetTextureDataType(psSrcTex->ui32RegisterNumber); + psContext->AddIndentation(); + AddAssignToDest(psDest, dataType, psSrcTex->GetNumSwizzleElements(), &numParenthesis); + + std::string texName = TranslateOperand(psSrcTex, TO_FLAG_NAME_ONLY); + + // TextureName.FuncName( + glsl << texName; + bformata(glsl, ".%s(", funcName); + + bool isDepthSampler = false; + for (unsigned j = 0, m = m_Textures.size(); j < m; ++j) + { + if (m_Textures[j].name == texName) + { + isDepthSampler = m_Textures[j].isDepthSampler; + break; + } + } + + // Sampler name + //TODO: Is it ok to use fixed shadow sampler in all cases of depth compare or would we need more + // accurate way of detecting shadow cases (atm all depth compares are interpreted as shadow usage) + if (ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE) + { + bcatcstr(glsl, "_mtl_xl_shadow_sampler"); + } + else + { + std::string sampName = TranslateOperand(psSrcSamp, TO_FLAG_NAME_ONLY); + + // insert the "sampler" prefix if the sampler name is equal to the texture name (default sampler) + if (texName == sampName) + sampName.insert(0, "sampler"); + glsl << sampName; + } + + bcatcstr(glsl, ", "); + + // Texture coordinates + TranslateTexCoord(eResDim, psDestAddr); + + // Depth compare reference value + if (ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE) + { + bcatcstr(glsl, ", saturate("); // TODO: why the saturate here? + glsl << TranslateOperand(psSrcRef, TO_AUTO_BITCAST_TO_FLOAT); + bcatcstr(glsl, ")"); + } + + // lod_options (LOD/grad/bias) based on the flags + if (ui32Flags & TEXSMP_FLAG_LOD) + { + bcatcstr(glsl, ", level("); + glsl << TranslateOperand(psSrcLOD, TO_AUTO_BITCAST_TO_FLOAT); + if (psContext->psShader->ui32MajorVersion < 4) + { + bcatcstr(glsl, ".w"); + } + bcatcstr(glsl, ")"); + } + else if (ui32Flags & TEXSMP_FLAG_FIRSTLOD) + { + bcatcstr(glsl, ", level(0.0)"); + } + else if (ui32Flags & TEXSMP_FLAG_GRAD) + { + glsl << std::string(", ") << std::string(gradientName) << std::string("(float4("); + glsl << TranslateOperand(psSrcDx, TO_AUTO_BITCAST_TO_FLOAT); + bcatcstr(glsl, ")"); + bcatcstr(glsl, gradSwizzle); + bcatcstr(glsl, ", float4("); + glsl << TranslateOperand(psSrcDy, TO_AUTO_BITCAST_TO_FLOAT); + bcatcstr(glsl, ")"); + bcatcstr(glsl, gradSwizzle); + bcatcstr(glsl, ")"); + } + else if (ui32Flags & (TEXSMP_FLAG_BIAS)) + { + glsl << std::string(", bias(") << TranslateOperand(psSrcBias, TO_AUTO_BITCAST_TO_FLOAT) << std::string(")"); + } + + bool hadOffset = false; + + // Add offset param + if (psInst->bAddressOffset) + { + hadOffset = true; + if (ui32NumOffsets == 1) + { + bformata(glsl, ", %d", + psInst->iUAddrOffset); + } + else if (ui32NumOffsets == 2) + { + bformata(glsl, ", int2(%d, %d)", + psInst->iUAddrOffset, + psInst->iVAddrOffset); + } + else if (ui32NumOffsets == 3) + { + bformata(glsl, ", int3(%d, %d, %d)", + psInst->iUAddrOffset, + psInst->iVAddrOffset, + psInst->iWAddrOffset); + } + } + // HLSL gather has a variant with separate offset operand + else if (ui32Flags & TEXSMP_FLAG_PARAMOFFSET) + { + hadOffset = true; + uint32_t mask = OPERAND_4_COMPONENT_MASK_X; + if (ui32NumOffsets > 1) + mask |= OPERAND_4_COMPONENT_MASK_Y; + if (ui32NumOffsets > 2) + mask |= OPERAND_4_COMPONENT_MASK_Z; + + bcatcstr(glsl, ","); + glsl << TranslateOperand(psSrcOff, TO_FLAG_INTEGER, mask); + } + + // Add texture gather component selection if needed + if ((ui32Flags & TEXSMP_FLAG_GATHER) && psSrcSamp->GetNumSwizzleElements() > 0) + { + ASSERT(psSrcSamp->GetNumSwizzleElements() == 1); + if (psSrcSamp->aui32Swizzle[0] != OPERAND_4_COMPONENT_X) + { + if (!(ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE)) + { + // Need to add offset param to match func overload + if (!hadOffset) + { + if (ui32NumOffsets == 1) + bcatcstr(glsl, ", 0"); + else + bformata(glsl, ", int%d(0)", ui32NumOffsets); + } + + bcatcstr(glsl, ", component::"); + glsl << TranslateOperandSwizzle(psSrcSamp, OPERAND_4_COMPONENT_MASK_ALL, 0, false); + } + else + { + psContext->m_Reflection.OnDiagnostics("Metal supports gather compare only for the first component.", 0, true); + } + } + } + + bcatcstr(glsl, ")"); + + if (!((ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE) || isDepthSampler) || (ui32Flags & TEXSMP_FLAG_GATHER)) + { + // iWriteMaskEnabled is forced off during DecodeOperand because swizzle on sampler uniforms + // does not make sense. But need to re-enable to correctly swizzle this particular instruction. + psSrcTex->iWriteMaskEnabled = 1; + glsl << TranslateOperandSwizzle(psSrcTex, psDest->GetAccessMask(), 0); + } + AddAssignPrologue(numParenthesis); +} + +// Handle cases where vector components are accessed with dynamic index ([] notation). +// A bit ugly hack because compiled HLSL uses byte offsets to access data in structs => we are converting +// the offset back to vector component index in runtime => calculating stuff back and forth. +// TODO: Would be better to eliminate the offset calculation ops and use indexes straight on. Could be tricky though... +void ToMetal::TranslateDynamicComponentSelection(const ShaderVarType* psVarType, const Operand* psByteAddr, uint32_t offset, uint32_t mask) +{ + bstring glsl = *psContext->currentGLSLString; + ASSERT(psVarType->Class == SVC_VECTOR); + + bcatcstr(glsl, "["); // Access vector component with [] notation + if (offset > 0) + bcatcstr(glsl, "("); + + // The var containing byte address to the requested element + glsl << TranslateOperand(psByteAddr, TO_FLAG_UNSIGNED_INTEGER, mask); + + if (offset > 0)// If the vector is part of a struct, there is an extra offset in our byte address + bformata(glsl, " - %du)", offset); // Subtract that first + + bcatcstr(glsl, " >> 0x2u"); // Convert byte offset to index: div by four + bcatcstr(glsl, "]"); +} + +void ToMetal::TranslateShaderStorageStore(Instruction* psInst) +{ + bstring glsl = *psContext->currentGLSLString; + int component; + int srcComponent = 0; + + Operand* psDest = 0; + Operand* psDestAddr = 0; + Operand* psDestByteOff = 0; + Operand* psSrc = 0; + + + switch (psInst->eOpcode) + { + case OPCODE_STORE_STRUCTURED: + psDest = &psInst->asOperands[0]; + psDestAddr = &psInst->asOperands[1]; + psDestByteOff = &psInst->asOperands[2]; + psSrc = &psInst->asOperands[3]; + + break; + case OPCODE_STORE_RAW: + case OPCODE_STORE_UAV_TYPED: // Hack typed buffer as raw buf + psDest = &psInst->asOperands[0]; + psDestByteOff = &psInst->asOperands[1]; + psSrc = &psInst->asOperands[2]; + break; + default: + ASSERT(0); + break; + } + + uint32_t dstOffFlag = TO_FLAG_UNSIGNED_INTEGER; + SHADER_VARIABLE_TYPE dstOffType = psDestByteOff->GetDataType(psContext); + if (dstOffType == SVT_INT || dstOffType == SVT_INT16 || dstOffType == SVT_INT12) + dstOffFlag = TO_FLAG_INTEGER; + + for (component = 0; component < 4; component++) + { + ASSERT(psInst->asOperands[0].eSelMode == OPERAND_4_COMPONENT_MASK_MODE); + if (psInst->asOperands[0].ui32CompMask & (1 << component)) + { + psContext->AddIndentation(); + glsl << TranslateOperand(psDest, TO_FLAG_DESTINATION | TO_FLAG_NAME_ONLY); + + if (psDestAddr) + { + bcatcstr(glsl, "["); + glsl << TranslateOperand(psDestAddr, TO_FLAG_INTEGER | TO_FLAG_UNSIGNED_INTEGER); + bcatcstr(glsl, "].value"); + } + + bcatcstr(glsl, "[("); + glsl << TranslateOperand(psDestByteOff, dstOffFlag); + if (psInst->eOpcode == OPCODE_STORE_UAV_TYPED) + { + bcatcstr(glsl, ")"); + } + else + { + bcatcstr(glsl, " >> 2"); + if (dstOffFlag == TO_FLAG_UNSIGNED_INTEGER) + bcatcstr(glsl, "u"); + bcatcstr(glsl, ")"); + + if (component != 0) + { + bformata(glsl, " + %d", component); + if (dstOffFlag == TO_FLAG_UNSIGNED_INTEGER) + bcatcstr(glsl, "u"); + } + } + bcatcstr(glsl, "]"); + + //Dest type is currently always a uint array. + bcatcstr(glsl, " = "); + if (psSrc->GetNumSwizzleElements() > 1) + glsl << TranslateOperand(psSrc, TO_FLAG_UNSIGNED_INTEGER, 1 << (srcComponent++)); + else + glsl << TranslateOperand(psSrc, TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_X); + + bformata(glsl, ";\n"); + } + } +} + +void ToMetal::TranslateShaderStorageLoad(Instruction* psInst) +{ + bstring glsl = *psContext->currentGLSLString; + int component; + Operand* psDest = 0; + Operand* psSrcAddr = 0; + Operand* psSrcByteOff = 0; + Operand* psSrc = 0; + + switch (psInst->eOpcode) + { + case OPCODE_LD_STRUCTURED: + psDest = &psInst->asOperands[0]; + psSrcAddr = &psInst->asOperands[1]; + psSrcByteOff = &psInst->asOperands[2]; + psSrc = &psInst->asOperands[3]; + break; + case OPCODE_LD_RAW: + case OPCODE_LD_UAV_TYPED: // Hack typed buffer as raw buf + psDest = &psInst->asOperands[0]; + psSrcByteOff = &psInst->asOperands[1]; + psSrc = &psInst->asOperands[2]; + break; + default: + ASSERT(0); + break; + } + + uint32_t destCount = psDest->GetNumSwizzleElements(); + uint32_t destMask = psDest->GetAccessMask(); + + int numParenthesis = 0; + int firstItemAdded = 0; + SHADER_VARIABLE_TYPE destDataType = psDest->GetDataType(psContext); + uint32_t srcOffFlag = TO_FLAG_UNSIGNED_INTEGER; + SHADER_VARIABLE_TYPE srcOffType = psSrcByteOff->GetDataType(psContext); + if (srcOffType == SVT_INT || srcOffType == SVT_INT16 || srcOffType == SVT_INT12) + srcOffFlag = TO_FLAG_INTEGER; + + psContext->AddIndentation(); + AddAssignToDest(psDest, destDataType, destCount, &numParenthesis); + if (destCount > 1) + { + bformata(glsl, "%s(", GetConstructorForTypeMetal(destDataType, destCount)); + numParenthesis++; + } + for (component = 0; component < 4; component++) + { + bool addedBitcast = false; + if (!(destMask & (1 << component))) + continue; + + if (firstItemAdded) + bcatcstr(glsl, ", "); + else + firstItemAdded = 1; + + // always uint array atm + if (destDataType == SVT_FLOAT) + { + // input already in uints, need bitcast + bcatcstr(glsl, "as_type("); + addedBitcast = true; + } + else if (destDataType == SVT_INT || destDataType == SVT_INT16 || destDataType == SVT_INT12) + { + bcatcstr(glsl, "int("); + addedBitcast = true; + } + + glsl << TranslateOperand(psSrc, TO_FLAG_NAME_ONLY); + + if (psSrcAddr) + { + bcatcstr(glsl, "["); + glsl << TranslateOperand(psSrcAddr, TO_FLAG_UNSIGNED_INTEGER | TO_FLAG_INTEGER); + bcatcstr(glsl, "].value"); + } + bcatcstr(glsl, "[("); + glsl << TranslateOperand(psSrcByteOff, srcOffFlag); + if (psInst->eOpcode == OPCODE_LD_UAV_TYPED) + { + bcatcstr(glsl, ")"); + } + else + { + bcatcstr(glsl, " >> 2"); + if (srcOffFlag == TO_FLAG_UNSIGNED_INTEGER) + bcatcstr(glsl, "u"); + + bformata(glsl, ") + %d", psSrc->eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE ? psSrc->aui32Swizzle[component] : component); + if (srcOffFlag == TO_FLAG_UNSIGNED_INTEGER) + bcatcstr(glsl, "u"); + } + bcatcstr(glsl, "]"); + + if (addedBitcast) + bcatcstr(glsl, ")"); + } + AddAssignPrologue(numParenthesis); +} + +void ToMetal::TranslateAtomicMemOp(Instruction* psInst) +{ + bstring glsl = *psContext->currentGLSLString; + int numParenthesis = 0; + uint32_t ui32DataTypeFlag = TO_FLAG_INTEGER; + const char* func = ""; + Operand* dest = 0; + Operand* previousValue = 0; + Operand* destAddr = 0; + Operand* src = 0; + Operand* compare = 0; + int texDim = 0; + bool isUint = true; + bool shouldAddFailMemoryOrder = false; + bool shouldExtractCompare = false; + + switch (psInst->eOpcode) + { + case OPCODE_IMM_ATOMIC_IADD: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_IADD\n"); +#endif + func = "atomic_fetch_add_explicit"; + previousValue = &psInst->asOperands[0]; + dest = &psInst->asOperands[1]; + destAddr = &psInst->asOperands[2]; + src = &psInst->asOperands[3]; + break; + } + case OPCODE_ATOMIC_IADD: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ATOMIC_IADD\n"); +#endif + func = "atomic_fetch_add_explicit"; + dest = &psInst->asOperands[0]; + destAddr = &psInst->asOperands[1]; + src = &psInst->asOperands[2]; + break; + } + case OPCODE_IMM_ATOMIC_AND: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_AND\n"); +#endif + func = "atomic_fetch_and_explicit"; + previousValue = &psInst->asOperands[0]; + dest = &psInst->asOperands[1]; + destAddr = &psInst->asOperands[2]; + src = &psInst->asOperands[3]; + break; + } + case OPCODE_ATOMIC_AND: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ATOMIC_AND\n"); +#endif + func = "atomic_fetch_and_explicit"; + dest = &psInst->asOperands[0]; + destAddr = &psInst->asOperands[1]; + src = &psInst->asOperands[2]; + break; + } + case OPCODE_IMM_ATOMIC_OR: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_OR\n"); +#endif + func = "atomic_fetch_or_explicit"; + previousValue = &psInst->asOperands[0]; + dest = &psInst->asOperands[1]; + destAddr = &psInst->asOperands[2]; + src = &psInst->asOperands[3]; + break; + } + case OPCODE_ATOMIC_OR: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ATOMIC_OR\n"); +#endif + func = "atomic_fetch_or_explicit"; + dest = &psInst->asOperands[0]; + destAddr = &psInst->asOperands[1]; + src = &psInst->asOperands[2]; + break; + } + case OPCODE_IMM_ATOMIC_XOR: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_XOR\n"); +#endif + func = "atomic_fetch_xor_explicit"; + previousValue = &psInst->asOperands[0]; + dest = &psInst->asOperands[1]; + destAddr = &psInst->asOperands[2]; + src = &psInst->asOperands[3]; + break; + } + case OPCODE_ATOMIC_XOR: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ATOMIC_XOR\n"); +#endif + func = "atomic_fetch_xor_explicit"; + dest = &psInst->asOperands[0]; + destAddr = &psInst->asOperands[1]; + src = &psInst->asOperands[2]; + break; + } + + case OPCODE_IMM_ATOMIC_EXCH: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_EXCH\n"); +#endif + func = "atomic_exchange_explicit"; + previousValue = &psInst->asOperands[0]; + dest = &psInst->asOperands[1]; + destAddr = &psInst->asOperands[2]; + src = &psInst->asOperands[3]; + break; + } + case OPCODE_IMM_ATOMIC_CMP_EXCH: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_CMP_EXC\n"); +#endif + func = "atomic_compare_exchange_weak_explicit"; + previousValue = &psInst->asOperands[0]; + dest = &psInst->asOperands[1]; + destAddr = &psInst->asOperands[2]; + compare = &psInst->asOperands[3]; + src = &psInst->asOperands[4]; + shouldAddFailMemoryOrder = true; + shouldExtractCompare = true; + break; + } + case OPCODE_ATOMIC_CMP_STORE: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ATOMIC_CMP_STORE\n"); +#endif + func = "atomic_compare_exchange_weak_explicit"; + previousValue = 0; + dest = &psInst->asOperands[0]; + destAddr = &psInst->asOperands[1]; + compare = &psInst->asOperands[2]; + src = &psInst->asOperands[3]; + shouldAddFailMemoryOrder = true; + shouldExtractCompare = true; + break; + } + case OPCODE_IMM_ATOMIC_UMIN: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_UMIN\n"); +#endif + func = "atomic_fetch_min_explicit"; + previousValue = &psInst->asOperands[0]; + dest = &psInst->asOperands[1]; + destAddr = &psInst->asOperands[2]; + src = &psInst->asOperands[3]; + break; + } + case OPCODE_ATOMIC_UMIN: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ATOMIC_UMIN\n"); +#endif + func = "atomic_fetch_min_explicit"; + dest = &psInst->asOperands[0]; + destAddr = &psInst->asOperands[1]; + src = &psInst->asOperands[2]; + break; + } + case OPCODE_IMM_ATOMIC_IMIN: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_IMIN\n"); +#endif + func = "atomic_fetch_min_explicit"; + previousValue = &psInst->asOperands[0]; + dest = &psInst->asOperands[1]; + destAddr = &psInst->asOperands[2]; + src = &psInst->asOperands[3]; + break; + } + case OPCODE_ATOMIC_IMIN: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ATOMIC_IMIN\n"); +#endif + func = "atomic_fetch_min_explicit"; + dest = &psInst->asOperands[0]; + destAddr = &psInst->asOperands[1]; + src = &psInst->asOperands[2]; + break; + } + case OPCODE_IMM_ATOMIC_UMAX: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_UMAX\n"); +#endif + func = "atomic_fetch_max_explicit"; + previousValue = &psInst->asOperands[0]; + dest = &psInst->asOperands[1]; + destAddr = &psInst->asOperands[2]; + src = &psInst->asOperands[3]; + break; + } + case OPCODE_ATOMIC_UMAX: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ATOMIC_UMAX\n"); +#endif + func = "atomic_fetch_max_explicit"; + dest = &psInst->asOperands[0]; + destAddr = &psInst->asOperands[1]; + src = &psInst->asOperands[2]; + break; + } + case OPCODE_IMM_ATOMIC_IMAX: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_IMAX\n"); +#endif + func = "atomic_fetch_max_explicit"; + previousValue = &psInst->asOperands[0]; + dest = &psInst->asOperands[1]; + destAddr = &psInst->asOperands[2]; + src = &psInst->asOperands[3]; + break; + } + case OPCODE_ATOMIC_IMAX: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ATOMIC_IMAX\n"); +#endif + func = "atomic_fetch_max_explicit"; + dest = &psInst->asOperands[0]; + destAddr = &psInst->asOperands[1]; + src = &psInst->asOperands[2]; + break; + } + default: + ASSERT(0); + break; + } + + psContext->AddIndentation(); + + const ResourceBinding* psBinding = 0; + if (dest->eType != OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY) + { + psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_UAV, dest->ui32RegisterNumber, &psBinding); + + if (psBinding->eType == RTYPE_UAV_RWTYPED) + { + isUint = (psBinding->ui32ReturnType == RETURN_TYPE_UINT); + + // Find out if it's texture and of what dimension + switch (psBinding->eDimension) + { + case REFLECT_RESOURCE_DIMENSION_TEXTURE1D: + texDim = 1; + break; + case REFLECT_RESOURCE_DIMENSION_TEXTURECUBE: + case REFLECT_RESOURCE_DIMENSION_TEXTURE1DARRAY: + case REFLECT_RESOURCE_DIMENSION_TEXTURE2D: + case REFLECT_RESOURCE_DIMENSION_TEXTURE2DMS: + texDim = 2; + break; + case REFLECT_RESOURCE_DIMENSION_TEXTURE3D: + case REFLECT_RESOURCE_DIMENSION_TEXTURE2DARRAY: + case REFLECT_RESOURCE_DIMENSION_TEXTURE2DMSARRAY: + case REFLECT_RESOURCE_DIMENSION_TEXTURECUBEARRAY: + texDim = 3; + break; + case REFLECT_RESOURCE_DIMENSION_BUFFER: // Hack typed buffer as raw buf + break; + default: + ASSERT(0); + break; + } + } + } + + if (texDim > 0) + { + psContext->m_Reflection.OnDiagnostics("Texture atomics are not supported in Metal", 0, true); + return; + } + + if (isUint) + ui32DataTypeFlag = TO_FLAG_UNSIGNED_INTEGER | TO_AUTO_BITCAST_TO_UINT; + else + ui32DataTypeFlag = TO_FLAG_INTEGER | TO_AUTO_BITCAST_TO_INT; + + if (shouldExtractCompare) + { + bcatcstr(glsl, "{\n"); + ++psContext->indent; + psContext->AddIndentation(); + bcatcstr(glsl, "uint compare_value = "); + glsl << TranslateOperand(compare, ui32DataTypeFlag); + bcatcstr(glsl, ";\n"); + psContext->AddIndentation(); + } + else if (previousValue) + AddAssignToDest(previousValue, isUint ? SVT_UINT : SVT_INT, 1, &numParenthesis); + + bcatcstr(glsl, func); + bcatcstr(glsl, "("); + + uint32_t destAddrFlag = TO_FLAG_UNSIGNED_INTEGER; + SHADER_VARIABLE_TYPE destAddrType = destAddr->GetDataType(psContext); + if (destAddrType == SVT_INT || destAddrType == SVT_INT16 || destAddrType == SVT_INT12) + destAddrFlag = TO_FLAG_INTEGER; + + if (dest->eType == OPERAND_TYPE_UNORDERED_ACCESS_VIEW) + bcatcstr(glsl, "reinterpret_cast(&"); + else + bcatcstr(glsl, "reinterpret_cast(&"); + glsl << TranslateOperand(dest, TO_FLAG_DESTINATION | TO_FLAG_NAME_ONLY); + bcatcstr(glsl, "["); + glsl << TranslateOperand(destAddr, destAddrFlag, OPERAND_4_COMPONENT_MASK_X); + + if (!psBinding || psBinding->eType != RTYPE_UAV_RWTYPED) + { + // Structured buf if we have both x & y swizzles. Raw buf has only x -> no .value[] + if (destAddr->GetNumSwizzleElements(OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y) == 2) + { + bcatcstr(glsl, "]"); + bcatcstr(glsl, ".value["); + glsl << TranslateOperand(destAddr, destAddrFlag, OPERAND_4_COMPONENT_MASK_Y); + } + + bcatcstr(glsl, " >> 2");//bytes to floats + if (destAddrFlag == TO_FLAG_UNSIGNED_INTEGER) + bcatcstr(glsl, "u"); + } + bcatcstr(glsl, "]), "); + + if (compare) + { + if (shouldExtractCompare) + { + bcatcstr(glsl, "&compare_value, "); + } + else + { + glsl << TranslateOperand(compare, ui32DataTypeFlag); + bcatcstr(glsl, ", "); + } + } + + glsl << TranslateOperand(src, ui32DataTypeFlag); + bcatcstr(glsl, ", memory_order::memory_order_relaxed"); + if (shouldAddFailMemoryOrder) + bcatcstr(glsl, ", memory_order::memory_order_relaxed"); + bcatcstr(glsl, ")"); + if (previousValue) + { + AddAssignPrologue(numParenthesis); + } + else + bcatcstr(glsl, ";\n"); + + if (shouldExtractCompare) + { + if (previousValue) + { + psContext->AddIndentation(); + AddAssignToDest(previousValue, SVT_UINT, 1, &numParenthesis); + bcatcstr(glsl, "compare_value"); + AddAssignPrologue(numParenthesis); + } + --psContext->indent; + psContext->AddIndentation(); + bcatcstr(glsl, "}\n"); + } +} + +void ToMetal::TranslateConditional( + Instruction* psInst, + bstring glsl) +{ + const char* statement = ""; + if (psInst->eOpcode == OPCODE_BREAKC) + { + statement = "break"; + } + else if (psInst->eOpcode == OPCODE_CONTINUEC) + { + statement = "continue"; + } + else if (psInst->eOpcode == OPCODE_RETC) // FIXME! Need to spew out shader epilogue + { + if (psContext->psShader->eShaderType == COMPUTE_SHADER || (psContext->psShader->eShaderType == PIXEL_SHADER && m_StructDefinitions[GetOutputStructName()].m_Members.size() == 0)) + statement = "return"; + else + statement = "return output"; + } + + + int isBool = psInst->asOperands[0].GetDataType(psContext) == SVT_BOOL; + + if (isBool) + { + bcatcstr(glsl, "if("); + if (psInst->eBooleanTestType != INSTRUCTION_TEST_NONZERO) + bcatcstr(glsl, "!"); + glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_BOOL); + if (psInst->eOpcode != OPCODE_IF) + { + bformata(glsl, "){%s;}\n", statement); + } + else + { + bcatcstr(glsl, "){\n"); + } + } + else + { + if (psInst->eBooleanTestType == INSTRUCTION_TEST_ZERO) + { + bcatcstr(glsl, "if(("); + glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_UNSIGNED_INTEGER); + + if (psInst->eOpcode != OPCODE_IF) + { + bformata(glsl, ")==uint(0)){%s;}\n", statement); + } + else + { + bcatcstr(glsl, ")==uint(0)){\n"); + } + } + else + { + ASSERT(psInst->eBooleanTestType == INSTRUCTION_TEST_NONZERO); + bcatcstr(glsl, "if(("); + glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_UNSIGNED_INTEGER); + + if (psInst->eOpcode != OPCODE_IF) + { + bformata(glsl, ")!=uint(0)){%s;}\n", statement); + } + else + { + bcatcstr(glsl, ")!=uint(0)){\n"); + } + } + } +} + +void ToMetal::TranslateInstruction(Instruction* psInst) +{ + bstring glsl = *psContext->currentGLSLString; + int numParenthesis = 0; + +#ifdef _DEBUG + // Uncomment to print instruction IDs + //psContext->AddIndentation(); + //bformata(glsl, "//Instruction %d\n", psInst->id); +#if 0 + if (psInst->id == 73) + { + ASSERT(1); //Set breakpoint here to debug an instruction from its ID. + } +#endif +#endif + + switch (psInst->eOpcode) + { + case OPCODE_FTOI: + case OPCODE_FTOU: + { + uint32_t dstCount = psInst->asOperands[0].GetNumSwizzleElements(); + uint32_t srcCount = psInst->asOperands[1].GetNumSwizzleElements(); + SHADER_VARIABLE_TYPE castType = psInst->eOpcode == OPCODE_FTOU ? SVT_UINT : SVT_INT; +#ifdef _DEBUG + psContext->AddIndentation(); + if (psInst->eOpcode == OPCODE_FTOU) + bcatcstr(glsl, "//FTOU\n"); + else + bcatcstr(glsl, "//FTOI\n"); +#endif + switch (psInst->asOperands[0].eMinPrecision) + { + case OPERAND_MIN_PRECISION_DEFAULT: + break; + case OPERAND_MIN_PRECISION_SINT_16: + castType = SVT_INT16; + ASSERT(psInst->eOpcode == OPCODE_FTOI); + break; + case OPERAND_MIN_PRECISION_UINT_16: + castType = SVT_UINT16; + ASSERT(psInst->eOpcode == OPCODE_FTOU); + break; + default: + ASSERT(0); // We'd be doing bitcasts into low/mediump ints, not good. + } + psContext->AddIndentation(); + + AddAssignToDest(&psInst->asOperands[0], castType, srcCount, &numParenthesis); + bcatcstr(glsl, GetConstructorForTypeMetal(castType, dstCount)); + bcatcstr(glsl, "("); // 1 + glsl << TranslateOperand(&psInst->asOperands[1], TO_AUTO_BITCAST_TO_FLOAT, psInst->asOperands[0].GetAccessMask()); + bcatcstr(glsl, ")"); // 1 + AddAssignPrologue(numParenthesis); + break; + } + + case OPCODE_MOV: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//MOV\n"); +#endif + psContext->AddIndentation(); + AddMOVBinaryOp(&psInst->asOperands[0], &psInst->asOperands[1]); + break; + } + case OPCODE_ITOF://signed to float + case OPCODE_UTOF://unsigned to float + { + SHADER_VARIABLE_TYPE castType = SVT_FLOAT; + uint32_t dstCount = psInst->asOperands[0].GetNumSwizzleElements(); + uint32_t srcCount = psInst->asOperands[1].GetNumSwizzleElements(); + +#ifdef _DEBUG + psContext->AddIndentation(); + if (psInst->eOpcode == OPCODE_ITOF) + { + bcatcstr(glsl, "//ITOF\n"); + } + else + { + bcatcstr(glsl, "//UTOF\n"); + } +#endif + + switch (psInst->asOperands[0].eMinPrecision) + { + case OPERAND_MIN_PRECISION_DEFAULT: + break; + case OPERAND_MIN_PRECISION_FLOAT_2_8: + castType = SVT_FLOAT10; + break; + case OPERAND_MIN_PRECISION_FLOAT_16: + castType = SVT_FLOAT16; + break; + default: + ASSERT(0); // We'd be doing bitcasts into low/mediump ints, not good. + } + + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], castType, srcCount, &numParenthesis); + bcatcstr(glsl, GetConstructorForTypeMetal(castType, dstCount)); + bcatcstr(glsl, "("); // 1 + glsl << TranslateOperand(&psInst->asOperands[1], psInst->eOpcode == OPCODE_UTOF ? TO_AUTO_BITCAST_TO_UINT : TO_AUTO_BITCAST_TO_INT, psInst->asOperands[0].GetAccessMask()); + bcatcstr(glsl, ")"); // 1 + AddAssignPrologue(numParenthesis); + break; + } + case OPCODE_MAD: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//MAD\n"); +#endif + CallHelper3("fma", psInst, 0, 1, 2, 3, 1); + break; + } + case OPCODE_IMAD: + { + uint32_t ui32Flags = TO_FLAG_INTEGER; +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IMAD\n"); +#endif + + if (psInst->asOperands[0].GetDataType(psContext) == SVT_UINT) + { + ui32Flags = TO_FLAG_UNSIGNED_INTEGER; + } + + CallTernaryOp("*", "+", psInst, 0, 1, 2, 3, ui32Flags); + break; + } + case OPCODE_DFMA: + { + uint32_t ui32Flags = TO_FLAG_DOUBLE; +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//DFMA\n"); +#endif + CallHelper3("fma", psInst, 0, 1, 2, 3, 1, ui32Flags); + break; + } + case OPCODE_DADD: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//DADD\n"); +#endif + CallBinaryOp("+", psInst, 0, 1, 2, SVT_DOUBLE); + break; + } + case OPCODE_IADD: + { + SHADER_VARIABLE_TYPE eType = SVT_INT; +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IADD\n"); +#endif + //Is this a signed or unsigned add? + if (psInst->asOperands[0].GetDataType(psContext) == SVT_UINT) + { + eType = SVT_UINT; + } + CallBinaryOp("+", psInst, 0, 1, 2, eType); + break; + } + case OPCODE_ADD: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ADD\n"); +#endif + CallBinaryOp("+", psInst, 0, 1, 2, SVT_FLOAT); + break; + } + case OPCODE_OR: + { + /*Todo: vector version */ +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//OR\n"); +#endif + if (psInst->asOperands[0].GetDataType(psContext) == SVT_BOOL) + { + uint32_t destMask = psInst->asOperands[0].GetAccessMask(); + + int needsParenthesis = 0; + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], SVT_BOOL, psInst->asOperands[0].GetNumSwizzleElements(), &needsParenthesis); + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_BOOL, destMask); + bcatcstr(glsl, " || "); + glsl << TranslateOperand(&psInst->asOperands[2], TO_FLAG_BOOL, destMask); + AddAssignPrologue(needsParenthesis); + } + else + CallBinaryOp("|", psInst, 0, 1, 2, SVT_UINT); + break; + } + case OPCODE_AND: + { + SHADER_VARIABLE_TYPE eA = psInst->asOperands[1].GetDataType(psContext); + SHADER_VARIABLE_TYPE eB = psInst->asOperands[2].GetDataType(psContext); +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//AND\n"); +#endif + uint32_t destMask = psInst->asOperands[0].GetAccessMask(); + uint32_t dstSwizCount = psInst->asOperands[0].GetNumSwizzleElements(); + SHADER_VARIABLE_TYPE eDataType = psInst->asOperands[0].GetDataType(psContext); + uint32_t ui32Flags = SVTTypeToFlag(eDataType); + if (psInst->asOperands[0].GetDataType(psContext) == SVT_BOOL) + { + int needsParenthesis = 0; + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], SVT_BOOL, psInst->asOperands[0].GetNumSwizzleElements(), &needsParenthesis); + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_BOOL, destMask); + bcatcstr(glsl, " && "); + glsl << TranslateOperand(&psInst->asOperands[2], TO_FLAG_BOOL, destMask); + AddAssignPrologue(needsParenthesis); + } + else if ((eA == SVT_BOOL || eB == SVT_BOOL) && !(eA == SVT_BOOL && eB == SVT_BOOL)) + { + int boolOp = eA == SVT_BOOL ? 1 : 2; + int otherOp = eA == SVT_BOOL ? 2 : 1; + int needsParenthesis = 0; + uint32_t i; + psContext->AddIndentation(); + + if (dstSwizCount == 1) + { + AddAssignToDest(&psInst->asOperands[0], eDataType, dstSwizCount, &needsParenthesis); + glsl << TranslateOperand(&psInst->asOperands[boolOp], TO_FLAG_BOOL, destMask); + bcatcstr(glsl, " ? "); + glsl << TranslateOperand(&psInst->asOperands[otherOp], ui32Flags, destMask); + bcatcstr(glsl, " : "); + + bcatcstr(glsl, GetConstructorForTypeMetal(eDataType, dstSwizCount)); + bcatcstr(glsl, "("); + for (i = 0; i < dstSwizCount; i++) + { + if (i > 0) + bcatcstr(glsl, ", "); + switch (eDataType) + { + case SVT_FLOAT: + case SVT_FLOAT10: + case SVT_FLOAT16: + case SVT_DOUBLE: + bcatcstr(glsl, "0.0"); + break; + default: + bcatcstr(glsl, "0"); + } + } + bcatcstr(glsl, ")"); + } + else if (eDataType == SVT_FLOAT) + { + // We can use select() + AddAssignToDest(&psInst->asOperands[0], eDataType, dstSwizCount, &needsParenthesis); + bcatcstr(glsl, "select("); + bcatcstr(glsl, GetConstructorForTypeMetal(eDataType, dstSwizCount)); + bcatcstr(glsl, "("); + for (i = 0; i < dstSwizCount; i++) + { + if (i > 0) + bcatcstr(glsl, ", "); + switch (eDataType) + { + case SVT_FLOAT: + case SVT_FLOAT10: + case SVT_FLOAT16: + case SVT_DOUBLE: + bcatcstr(glsl, "0.0"); + break; + default: + bcatcstr(glsl, "0"); + } + } + bcatcstr(glsl, "), "); + glsl << TranslateOperand(&psInst->asOperands[otherOp], ui32Flags, destMask); + bcatcstr(glsl, ", "); + bcatcstr(glsl, GetConstructorForTypeMetal(SVT_BOOL, dstSwizCount)); + bcatcstr(glsl, "("); + glsl << TranslateOperand(&psInst->asOperands[boolOp], TO_FLAG_BOOL, destMask); + bcatcstr(glsl, ")"); + bcatcstr(glsl, ")"); + } + else + { + AddAssignToDest(&psInst->asOperands[0], SVT_UINT, dstSwizCount, &needsParenthesis); + bcatcstr(glsl, "("); + bcatcstr(glsl, GetConstructorForTypeMetal(SVT_UINT, dstSwizCount)); + bcatcstr(glsl, "("); + glsl << TranslateOperand(&psInst->asOperands[boolOp], TO_FLAG_BOOL, destMask); + bcatcstr(glsl, ") * 0xffffffffu) & "); + glsl << TranslateOperand(&psInst->asOperands[otherOp], TO_FLAG_UNSIGNED_INTEGER, destMask); + } + + AddAssignPrologue(needsParenthesis); + } + else + { + CallBinaryOp("&", psInst, 0, 1, 2, SVT_UINT); + } + + + break; + } + case OPCODE_GE: + { + /* + dest = vec4(greaterThanEqual(vec4(srcA), vec4(srcB)); + Caveat: The result is a boolean but HLSL asm returns 0xFFFFFFFF/0x0 instead. + */ +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//GE\n"); +#endif + AddComparison(psInst, CMP_GE, TO_FLAG_NONE); + break; + } + case OPCODE_MUL: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//MUL\n"); +#endif + CallBinaryOp("*", psInst, 0, 1, 2, SVT_FLOAT); + break; + } + case OPCODE_IMUL: + { + SHADER_VARIABLE_TYPE eType = SVT_INT; +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IMUL\n"); +#endif + if (psInst->asOperands[1].GetDataType(psContext) == SVT_UINT) + { + eType = SVT_UINT; + } + + ASSERT(psInst->asOperands[0].eType == OPERAND_TYPE_NULL); + + CallBinaryOp("*", psInst, 1, 2, 3, eType); + break; + } + case OPCODE_UDIV: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//UDIV\n"); +#endif + //destQuotient, destRemainder, src0, src1 + + // There are cases where destQuotient is the same variable as src0 or src1. If that happens, + // we need to compute "%" before the "/" in order to avoid src0 or src1 being overriden first. + if ((psInst->asOperands[0].eType != psInst->asOperands[2].eType || psInst->asOperands[0].ui32RegisterNumber != psInst->asOperands[2].ui32RegisterNumber) + && (psInst->asOperands[0].eType != psInst->asOperands[3].eType || psInst->asOperands[0].ui32RegisterNumber != psInst->asOperands[3].ui32RegisterNumber)) + { + CallBinaryOp("/", psInst, 0, 2, 3, SVT_UINT); + CallBinaryOp("%", psInst, 1, 2, 3, SVT_UINT); + } + else + { + CallBinaryOp("%", psInst, 1, 2, 3, SVT_UINT); + CallBinaryOp("/", psInst, 0, 2, 3, SVT_UINT); + } + break; + } + case OPCODE_DIV: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//DIV\n"); +#endif + CallBinaryOp("/", psInst, 0, 1, 2, SVT_FLOAT); + break; + } + case OPCODE_SINCOS: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//SINCOS\n"); +#endif + // Need careful ordering if src == dest[0], as then the cos() will be reading from wrong value + if (psInst->asOperands[0].eType == psInst->asOperands[2].eType && + psInst->asOperands[0].ui32RegisterNumber == psInst->asOperands[2].ui32RegisterNumber) + { + // sin() result overwrites source, do cos() first. + // The case where both write the src shouldn't really happen anyway. + if (psInst->asOperands[1].eType != OPERAND_TYPE_NULL) + { + CallHelper1("cos", psInst, 1, 2, 1); + } + + if (psInst->asOperands[0].eType != OPERAND_TYPE_NULL) + { + CallHelper1( + "sin", psInst, 0, 2, 1); + } + } + else + { + if (psInst->asOperands[0].eType != OPERAND_TYPE_NULL) + { + CallHelper1("sin", psInst, 0, 2, 1); + } + + if (psInst->asOperands[1].eType != OPERAND_TYPE_NULL) + { + CallHelper1("cos", psInst, 1, 2, 1); + } + } + break; + } + + case OPCODE_DP2: + { + int numParenthesis = 0; +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//DP2\n"); +#endif + psContext->AddIndentation(); + SHADER_VARIABLE_TYPE dstType = psInst->asOperands[0].GetDataType(psContext); + uint32_t typeFlags = TO_AUTO_BITCAST_TO_FLOAT | TO_AUTO_EXPAND_TO_VEC2; + if (CanForceToHalfOperand(&psInst->asOperands[1]) + && CanForceToHalfOperand(&psInst->asOperands[2])) + typeFlags = TO_FLAG_FORCE_HALF | TO_AUTO_EXPAND_TO_VEC2; + + if (dstType != SVT_FLOAT16) + dstType = SVT_FLOAT; + + AddAssignToDest(&psInst->asOperands[0], dstType, 1, &numParenthesis); + bcatcstr(glsl, "dot("); + glsl << TranslateOperand(&psInst->asOperands[1], typeFlags, 3 /* .xy */); + bcatcstr(glsl, ", "); + glsl << TranslateOperand(&psInst->asOperands[2], typeFlags, 3 /* .xy */); + bcatcstr(glsl, ")"); + AddAssignPrologue(numParenthesis); + break; + } + case OPCODE_DP3: + { + int numParenthesis = 0; +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//DP3\n"); +#endif + psContext->AddIndentation(); + SHADER_VARIABLE_TYPE dstType = psInst->asOperands[0].GetDataType(psContext); + uint32_t typeFlags = TO_AUTO_BITCAST_TO_FLOAT | TO_AUTO_EXPAND_TO_VEC3; + if (CanForceToHalfOperand(&psInst->asOperands[1]) + && CanForceToHalfOperand(&psInst->asOperands[2])) + typeFlags = TO_FLAG_FORCE_HALF | TO_AUTO_EXPAND_TO_VEC3; + + if (dstType != SVT_FLOAT16) + dstType = SVT_FLOAT; + + AddAssignToDest(&psInst->asOperands[0], dstType, 1, &numParenthesis); + bcatcstr(glsl, "dot("); + glsl << TranslateOperand(&psInst->asOperands[1], typeFlags, 7 /* .xyz */); + bcatcstr(glsl, ", "); + glsl << TranslateOperand(&psInst->asOperands[2], typeFlags, 7 /* .xyz */); + bcatcstr(glsl, ")"); + AddAssignPrologue(numParenthesis); + break; + } + case OPCODE_DP4: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//DP4\n"); +#endif + CallHelper2("dot", psInst, 0, 1, 2, 0); + break; + } + case OPCODE_INE: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//INE\n"); +#endif + AddComparison(psInst, CMP_NE, TO_FLAG_INTEGER); + break; + } + case OPCODE_NE: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//NE\n"); +#endif + AddComparison(psInst, CMP_NE, TO_FLAG_NONE); + break; + } + case OPCODE_IGE: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IGE\n"); +#endif + AddComparison(psInst, CMP_GE, TO_FLAG_INTEGER); + break; + } + case OPCODE_ILT: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ILT\n"); +#endif + AddComparison(psInst, CMP_LT, TO_FLAG_INTEGER); + break; + } + case OPCODE_LT: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//LT\n"); +#endif + AddComparison(psInst, CMP_LT, TO_FLAG_NONE); + break; + } + case OPCODE_IEQ: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IEQ\n"); +#endif + AddComparison(psInst, CMP_EQ, TO_FLAG_INTEGER); + break; + } + case OPCODE_ULT: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ULT\n"); +#endif + AddComparison(psInst, CMP_LT, TO_FLAG_UNSIGNED_INTEGER); + break; + } + case OPCODE_UGE: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//UGE\n"); +#endif + AddComparison(psInst, CMP_GE, TO_FLAG_UNSIGNED_INTEGER); + break; + } + case OPCODE_MOVC: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//MOVC\n"); +#endif + AddMOVCBinaryOp(&psInst->asOperands[0], &psInst->asOperands[1], &psInst->asOperands[2], &psInst->asOperands[3]); + break; + } + case OPCODE_SWAPC: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//SWAPC\n"); +#endif + // TODO needs temps!! + ASSERT(0); + AddMOVCBinaryOp(&psInst->asOperands[0], &psInst->asOperands[2], &psInst->asOperands[4], &psInst->asOperands[3]); + AddMOVCBinaryOp(&psInst->asOperands[1], &psInst->asOperands[2], &psInst->asOperands[3], &psInst->asOperands[4]); + break; + } + + case OPCODE_LOG: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//LOG\n"); +#endif + CallHelper1("log2", psInst, 0, 1, 1); + break; + } + case OPCODE_RSQ: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//RSQ\n"); +#endif + CallHelper1("rsqrt", psInst, 0, 1, 1); + break; + } + case OPCODE_EXP: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//EXP\n"); +#endif + CallHelper1("exp2", psInst, 0, 1, 1); + break; + } + case OPCODE_SQRT: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//SQRT\n"); +#endif + CallHelper1("sqrt", psInst, 0, 1, 1); + break; + } + case OPCODE_ROUND_PI: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ROUND_PI\n"); +#endif + CallHelper1("ceil", psInst, 0, 1, 1); + break; + } + case OPCODE_ROUND_NI: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ROUND_NI\n"); +#endif + CallHelper1("floor", psInst, 0, 1, 1); + break; + } + case OPCODE_ROUND_Z: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ROUND_Z\n"); +#endif + CallHelper1("trunc", psInst, 0, 1, 1); + break; + } + case OPCODE_ROUND_NE: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ROUND_NE\n"); +#endif + CallHelper1("rint", psInst, 0, 1, 1); + break; + } + case OPCODE_FRC: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//FRC\n"); +#endif + CallHelper1("fract", psInst, 0, 1, 1); + break; + } + case OPCODE_IMAX: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IMAX\n"); +#endif + CallHelper2Int("max", psInst, 0, 1, 2, 1); + break; + } + case OPCODE_UMAX: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//UMAX\n"); +#endif + CallHelper2UInt("max", psInst, 0, 1, 2, 1); + break; + } + case OPCODE_MAX: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//MAX\n"); +#endif + CallHelper2("max", psInst, 0, 1, 2, 1); + break; + } + case OPCODE_IMIN: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IMIN\n"); +#endif + CallHelper2Int("min", psInst, 0, 1, 2, 1); + break; + } + case OPCODE_UMIN: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//UMIN\n"); +#endif + CallHelper2UInt("min", psInst, 0, 1, 2, 1); + break; + } + case OPCODE_MIN: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//MIN\n"); +#endif + CallHelper2("min", psInst, 0, 1, 2, 1); + break; + } + case OPCODE_GATHER4: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//GATHER4\n"); +#endif + TranslateTextureSample(psInst, TEXSMP_FLAG_GATHER); + break; + } + case OPCODE_GATHER4_PO_C: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//GATHER4_PO_C\n"); +#endif + TranslateTextureSample(psInst, TEXSMP_FLAG_GATHER | TEXSMP_FLAG_PARAMOFFSET | TEXSMP_FLAG_DEPTHCOMPARE); + break; + } + case OPCODE_GATHER4_PO: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//GATHER4_PO\n"); +#endif + TranslateTextureSample(psInst, TEXSMP_FLAG_GATHER | TEXSMP_FLAG_PARAMOFFSET); + break; + } + case OPCODE_GATHER4_C: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//GATHER4_C\n"); +#endif + TranslateTextureSample(psInst, TEXSMP_FLAG_GATHER | TEXSMP_FLAG_DEPTHCOMPARE); + break; + } + case OPCODE_SAMPLE: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//SAMPLE\n"); +#endif + TranslateTextureSample(psInst, TEXSMP_FLAG_NONE); + break; + } + case OPCODE_SAMPLE_L: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//SAMPLE_L\n"); +#endif + TranslateTextureSample(psInst, TEXSMP_FLAG_LOD); + break; + } + case OPCODE_SAMPLE_C: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//SAMPLE_C\n"); +#endif + + TranslateTextureSample(psInst, TEXSMP_FLAG_DEPTHCOMPARE); + break; + } + case OPCODE_SAMPLE_C_LZ: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//SAMPLE_C_LZ\n"); +#endif + + TranslateTextureSample(psInst, TEXSMP_FLAG_DEPTHCOMPARE | TEXSMP_FLAG_FIRSTLOD); + break; + } + case OPCODE_SAMPLE_D: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//SAMPLE_D\n"); +#endif + + TranslateTextureSample(psInst, TEXSMP_FLAG_GRAD); + break; + } + case OPCODE_SAMPLE_B: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//SAMPLE_B\n"); +#endif + + TranslateTextureSample(psInst, TEXSMP_FLAG_BIAS); + break; + } + case OPCODE_RET: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//RET\n"); +#endif + if (psContext->psShader->asPhases[psContext->currentPhase].hasPostShaderCode) + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//--- Post shader code ---\n"); +#endif + bconcat(glsl, psContext->psShader->asPhases[psContext->currentPhase].postShaderCode); +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//--- End post shader code ---\n"); +#endif + } + psContext->AddIndentation(); + if (psContext->psShader->eShaderType == COMPUTE_SHADER || (psContext->psShader->eShaderType == PIXEL_SHADER && m_StructDefinitions[GetOutputStructName()].m_Members.size() == 0)) + bcatcstr(glsl, "return;\n"); + else + bcatcstr(glsl, "return output;\n"); + + break; + } + case OPCODE_INTERFACE_CALL: + { + ASSERT(0); + } + case OPCODE_LABEL: + { + ASSERT(0); // Never seen this + } + case OPCODE_COUNTBITS: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//COUNTBITS\n"); +#endif + psContext->AddIndentation(); + glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER | TO_FLAG_DESTINATION); + bcatcstr(glsl, " = popCount("); + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER); + bcatcstr(glsl, ");\n"); + break; + } + case OPCODE_FIRSTBIT_HI: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//FIRSTBIT_HI\n"); +#endif + DeclareExtraFunction("firstBit_hi", "template UVecType firstBit_hi(const UVecType input) { UVecType res = clz(input); return res; };"); + // TODO implement the 0-case (must return 0xffffffff) + psContext->AddIndentation(); + glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_UNSIGNED_INTEGER | TO_FLAG_DESTINATION); + bcatcstr(glsl, " = firstBit_hi("); + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER); + bcatcstr(glsl, ");\n"); + break; + } + case OPCODE_FIRSTBIT_LO: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//FIRSTBIT_LO\n"); +#endif + // TODO implement the 0-case (must return 0xffffffff) + DeclareExtraFunction("firstBit_lo", "template UVecType firstBit_lo(const UVecType input) { UVecType res = ctz(input); return res; };"); + psContext->AddIndentation(); + glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_UNSIGNED_INTEGER | TO_FLAG_DESTINATION); + bcatcstr(glsl, " = firstBit_lo("); + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER); + bcatcstr(glsl, ");\n"); + break; + } + case OPCODE_FIRSTBIT_SHI: //signed high + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//FIRSTBIT_SHI\n"); +#endif + // TODO Not at all correct for negative values yet. + DeclareExtraFunction("firstBit_shi", "template IVecType firstBit_shi(const IVecType input) { IVecType res = clz(input); return res; };"); + psContext->AddIndentation(); + glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER | TO_FLAG_DESTINATION); + bcatcstr(glsl, " = firstBit_shi("); + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER); + bcatcstr(glsl, ");\n"); + break; + } + case OPCODE_BFREV: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//BFREV\n"); +#endif + DeclareExtraFunction("bitReverse", "template UVecType bitReverse(const UVecType input)\n\ +\t\t{ UVecType x = input;\n\ +\t\t\tx = (((x & 0xaaaaaaaa) >> 1) | ((x & 0x55555555) << 1));\n\ +\t\t\tx = (((x & 0xcccccccc) >> 2) | ((x & 0x33333333) << 2));\n\ +\t\t\tx = (((x & 0xf0f0f0f0) >> 4) | ((x & 0x0f0f0f0f) << 4));\n\ +\t\t\tx = (((x & 0xff00ff00) >> 8) | ((x & 0x00ff00ff) << 8));\n\ +\t\t\treturn((x >> 16) | (x << 16));\n\ +\t\t}; "); + psContext->AddIndentation(); + glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER | TO_FLAG_DESTINATION); + bcatcstr(glsl, " = bitReverse("); + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER); + bcatcstr(glsl, ");\n"); + break; + } + case OPCODE_BFI: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//BFI\n"); +#endif + DeclareExtraFunction("BFI", "\ +\t\ttemplate UVecType bitFieldInsert(const UVecType width, const UVecType offset, const UVecType src2, const UVecType src3)\n\ +\t\t{\n\ +\t\t\tUVecType bitmask = (((UVecType(1) << width)-1) << offset) & 0xffffffff;\n\ +\t\t\treturn ((src2 << offset) & bitmask) | (src3 & ~bitmask);\n\ +\t\t}; "); + psContext->AddIndentation(); + + uint32_t destMask = psInst->asOperands[0].GetAccessMask(); + AddAssignToDest(&psInst->asOperands[0], SVT_UINT, psInst->asOperands[0].GetNumSwizzleElements(), &numParenthesis); + bcatcstr(glsl, "bitFieldInsert("); + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, destMask); + bcatcstr(glsl, ", "); + glsl << TranslateOperand(&psInst->asOperands[2], TO_FLAG_UNSIGNED_INTEGER, destMask); + bcatcstr(glsl, ", "); + glsl << TranslateOperand(&psInst->asOperands[3], TO_FLAG_UNSIGNED_INTEGER, destMask); + bcatcstr(glsl, ", "); + glsl << TranslateOperand(&psInst->asOperands[4], TO_FLAG_UNSIGNED_INTEGER, destMask); + bcatcstr(glsl, ")"); + + AddAssignPrologue(numParenthesis); + break; + } + case OPCODE_CUT: + case OPCODE_EMITTHENCUT_STREAM: + case OPCODE_EMIT: + case OPCODE_EMITTHENCUT: + case OPCODE_CUT_STREAM: + case OPCODE_EMIT_STREAM: + { + ASSERT(0); // Not on metal + } + case OPCODE_REP: + case OPCODE_ENDREP: + { + ASSERT(0); // Shouldn't see these anymore + } + case OPCODE_LOOP: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//LOOP\n"); +#endif + psContext->AddIndentation(); + + bcatcstr(glsl, "while(true){\n"); + ++psContext->indent; + break; + } + case OPCODE_ENDLOOP: + { + --psContext->indent; +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ENDLOOP\n"); +#endif + psContext->AddIndentation(); + bcatcstr(glsl, "}\n"); + break; + } + case OPCODE_BREAK: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//BREAK\n"); +#endif + psContext->AddIndentation(); + bcatcstr(glsl, "break;\n"); + break; + } + case OPCODE_BREAKC: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//BREAKC\n"); +#endif + psContext->AddIndentation(); + + TranslateConditional(psInst, glsl); + break; + } + case OPCODE_CONTINUEC: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//CONTINUEC\n"); +#endif + psContext->AddIndentation(); + + TranslateConditional(psInst, glsl); + break; + } + case OPCODE_IF: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IF\n"); +#endif + psContext->AddIndentation(); + + TranslateConditional(psInst, glsl); + ++psContext->indent; + break; + } + case OPCODE_RETC: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//RETC\n"); +#endif + psContext->AddIndentation(); + + TranslateConditional(psInst, glsl); + break; + } + case OPCODE_ELSE: + { + --psContext->indent; +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ELSE\n"); +#endif + psContext->AddIndentation(); + bcatcstr(glsl, "} else {\n"); + psContext->indent++; + break; + } + case OPCODE_ENDSWITCH: + case OPCODE_ENDIF: + { + --psContext->indent; +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ENDIF\n"); +#endif + psContext->AddIndentation(); + bcatcstr(glsl, "}\n"); + break; + } + case OPCODE_CONTINUE: + { + psContext->AddIndentation(); + bcatcstr(glsl, "continue;\n"); + break; + } + case OPCODE_DEFAULT: + { + --psContext->indent; + psContext->AddIndentation(); + bcatcstr(glsl, "default:\n"); + ++psContext->indent; + break; + } + case OPCODE_NOP: + { + break; + } + case OPCODE_SYNC: + { + const uint32_t ui32SyncFlags = psInst->ui32SyncFlags; + +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//SYNC\n"); +#endif + const bool sync_threadgroup = (ui32SyncFlags & SYNC_THREAD_GROUP_SHARED_MEMORY) != 0; + const bool sync_device = (ui32SyncFlags & (SYNC_UNORDERED_ACCESS_VIEW_MEMORY_GROUP | SYNC_UNORDERED_ACCESS_VIEW_MEMORY_GLOBAL)) != 0; + + const char* barrierFlags = "mem_flags::mem_none"; + if (sync_threadgroup && sync_device) barrierFlags = "mem_flags::mem_threadgroup | mem_flags::mem_device"; + else if (sync_threadgroup) barrierFlags = "mem_flags::mem_threadgroup"; + else if (sync_device) barrierFlags = "mem_flags::mem_device"; + + if (ui32SyncFlags & SYNC_THREADS_IN_GROUP) + { + psContext->AddIndentation(); + bformata(glsl, "threadgroup_barrier(%s);\n", barrierFlags); + } + else + { + psContext->AddIndentation(); bformata(glsl, "#if __HAVE_SIMDGROUP_BARRIER__\n"); + psContext->AddIndentation(); bformata(glsl, "simdgroup_barrier(%s);\n", barrierFlags); + psContext->AddIndentation(); bformata(glsl, "#else\n"); + psContext->AddIndentation(); bformata(glsl, "threadgroup_barrier(%s);\n", barrierFlags); + psContext->AddIndentation(); bformata(glsl, "#endif\n"); + } + + break; + } + case OPCODE_SWITCH: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//SWITCH\n"); +#endif + psContext->AddIndentation(); + bcatcstr(glsl, "switch(int("); + glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER); + bcatcstr(glsl, ")){\n"); + + psContext->indent += 2; + break; + } + case OPCODE_CASE: + { + --psContext->indent; +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//case\n"); +#endif + psContext->AddIndentation(); + + bcatcstr(glsl, "case "); + glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER); + bcatcstr(glsl, ":\n"); + + ++psContext->indent; + break; + } + case OPCODE_EQ: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//EQ\n"); +#endif + AddComparison(psInst, CMP_EQ, TO_FLAG_NONE); + break; + } + case OPCODE_USHR: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//USHR\n"); +#endif + CallBinaryOp(">>", psInst, 0, 1, 2, SVT_UINT); + break; + } + case OPCODE_ISHL: + { + SHADER_VARIABLE_TYPE eType = SVT_INT; + +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ISHL\n"); +#endif + + if (psInst->asOperands[0].GetDataType(psContext) == SVT_UINT) + { + eType = SVT_UINT; + } + + CallBinaryOp("<<", psInst, 0, 1, 2, eType); + break; + } + case OPCODE_ISHR: + { + SHADER_VARIABLE_TYPE eType = SVT_INT; +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//ISHR\n"); +#endif + + if (psInst->asOperands[0].GetDataType(psContext) == SVT_UINT) + { + eType = SVT_UINT; + } + + CallBinaryOp(">>", psInst, 0, 1, 2, eType); + break; + } + case OPCODE_LD: + case OPCODE_LD_MS: + { + const ResourceBinding* psBinding = 0; +#ifdef _DEBUG + psContext->AddIndentation(); + if (psInst->eOpcode == OPCODE_LD) + bcatcstr(glsl, "//LD\n"); + else + bcatcstr(glsl, "//LD_MS\n"); +#endif + + psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_TEXTURE, psInst->asOperands[2].ui32RegisterNumber, &psBinding); + + if (psInst->eResDim == RESOURCE_DIMENSION_BUFFER) // Hack typed buffer as raw buf + { + psInst->eOpcode = OPCODE_LD_UAV_TYPED; + psInst->asOperands[1].eSelMode = OPERAND_4_COMPONENT_SELECT_1_MODE; + if (psInst->asOperands[1].eType == OPERAND_TYPE_IMMEDIATE32) + psInst->asOperands[1].iNumComponents = 1; + TranslateShaderStorageLoad(psInst); + break; + } - psContext->AddIndentation(); + if (psInst->bAddressOffset) + { + TranslateTexelFetchOffset(psInst, psBinding, glsl); + } + else + { + TranslateTexelFetch(psInst, psBinding, glsl); + } + break; + } + case OPCODE_DISCARD: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//DISCARD\n"); +#endif + psContext->AddIndentation(); + if (psInst->eBooleanTestType == INSTRUCTION_TEST_ZERO) + { + bcatcstr(glsl, "if(("); + glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER); + bcatcstr(glsl, ")==0){discard_fragment();}\n"); + } + else + { + ASSERT(psInst->eBooleanTestType == INSTRUCTION_TEST_NONZERO); + bcatcstr(glsl, "if(("); + glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER); + bcatcstr(glsl, ")!=0){discard_fragment();}\n"); + } + break; + } + case OPCODE_LOD: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//LOD\n"); +#endif + //LOD computes the following vector (ClampedLOD, NonClampedLOD, 0, 0) - glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_NAME_ONLY); - bcatcstr(glsl, ".write("); + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], SVT_FLOAT, 4, &numParenthesis); - #define RRD(n) REFLECT_RESOURCE_DIMENSION_ ## n + //If the core language does not have query-lod feature, + //then the extension is used. The name of the function + //changed between extension and core. + if (HaveQueryLod(psContext->psShader->eTargetLanguage)) + { + bcatcstr(glsl, "textureQueryLod("); + } + else + { + bcatcstr(glsl, "textureQueryLOD("); + } - // unlike glsl, texture arrays will have index in separate argument - const bool isArray = psRes->eDimension == RRD(TEXTURE1DARRAY) || psRes->eDimension == RRD(TEXTURE2DARRAY) - || psRes->eDimension == RRD(TEXTURE2DMSARRAY) || psRes->eDimension == RRD(TEXTURECUBEARRAY); - - uint32_t flags = TO_FLAG_UNSIGNED_INTEGER, opMask = OPERAND_4_COMPONENT_MASK_ALL; - switch (psRes->eDimension) - { - case RRD(TEXTURE1D): case RRD(TEXTURE1DARRAY): - opMask = OPERAND_4_COMPONENT_MASK_X; - break; - case RRD(TEXTURE2D): case RRD(TEXTURE2DMS): case RRD(TEXTURE2DARRAY): case RRD(TEXTURE2DMSARRAY): - opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y; - flags |= TO_AUTO_EXPAND_TO_VEC2; - break; - case RRD(TEXTURE3D): case RRD(TEXTURECUBE): case RRD(TEXTURECUBEARRAY): - opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y | OPERAND_4_COMPONENT_MASK_Z; - flags |= TO_AUTO_EXPAND_TO_VEC3; - break; - default: - ASSERT(0); + glsl << TranslateOperand(&psInst->asOperands[2], TO_FLAG_NONE); + bcatcstr(glsl, ","); + TranslateTexCoord( + psContext->psShader->aeResourceDims[psInst->asOperands[2].ui32RegisterNumber], + &psInst->asOperands[1]); + bcatcstr(glsl, ")"); + + //The swizzle on srcResource allows the returned values to be swizzled arbitrarily before they are written to the destination. + + // iWriteMaskEnabled is forced off during DecodeOperand because swizzle on sampler uniforms + // does not make sense. But need to re-enable to correctly swizzle this particular instruction. + psInst->asOperands[2].iWriteMaskEnabled = 1; + glsl << TranslateOperandSwizzle(&psInst->asOperands[2], psInst->asOperands[0].GetAccessMask(), 0); + AddAssignPrologue(numParenthesis); break; - }; + } + case OPCODE_EVAL_CENTROID: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//EVAL_CENTROID\n"); +#endif + psContext->AddIndentation(); + glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); + bcatcstr(glsl, " = interpolateAtCentroid("); + //interpolateAtCentroid accepts in-qualified variables. + //As long as bytecode only writes vX registers in declarations + //we should be able to use the declared name directly. + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_DECLARATION_NAME); + bcatcstr(glsl, ");\n"); + break; + } + case OPCODE_EVAL_SAMPLE_INDEX: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//EVAL_SAMPLE_INDEX\n"); +#endif + psContext->AddIndentation(); + glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); + bcatcstr(glsl, " = interpolateAtSample("); + //interpolateAtSample accepts in-qualified variables. + //As long as bytecode only writes vX registers in declarations + //we should be able to use the declared name directly. + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_DECLARATION_NAME); + bcatcstr(glsl, ", "); + glsl << TranslateOperand(&psInst->asOperands[2], TO_FLAG_INTEGER); + bcatcstr(glsl, ");\n"); + break; + } + case OPCODE_EVAL_SNAPPED: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//EVAL_SNAPPED\n"); +#endif + psContext->AddIndentation(); + glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); + bcatcstr(glsl, " = interpolateAtOffset("); + //interpolateAtOffset accepts in-qualified variables. + //As long as bytecode only writes vX registers in declarations + //we should be able to use the declared name directly. + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_DECLARATION_NAME); + bcatcstr(glsl, ", "); + glsl << TranslateOperand(&psInst->asOperands[2], TO_FLAG_INTEGER); + bcatcstr(glsl, ".xy);\n"); + break; + } + case OPCODE_LD_STRUCTURED: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//LD_STRUCTURED\n"); +#endif + TranslateShaderStorageLoad(psInst); + break; + } + case OPCODE_LD_UAV_TYPED: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//LD_UAV_TYPED\n"); +#endif + Operand* psDest = &psInst->asOperands[0]; + Operand* psSrc = &psInst->asOperands[2]; + Operand* psSrcAddr = &psInst->asOperands[1]; + + const ResourceBinding* psRes = 0; + psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_UAV, psSrc->ui32RegisterNumber, &psRes); + SHADER_VARIABLE_TYPE srcDataType = ResourceReturnTypeToSVTType(psRes->ui32ReturnType, psRes->ePrecision); + + if (psInst->eResDim == RESOURCE_DIMENSION_BUFFER) // Hack typed buffer as raw buf + { + psSrc->aeDataType[0] = srcDataType; + psSrcAddr->eSelMode = OPERAND_4_COMPONENT_SELECT_1_MODE; + if (psSrcAddr->eType == OPERAND_TYPE_IMMEDIATE32) + psSrcAddr->iNumComponents = 1; + TranslateShaderStorageLoad(psInst); + break; + } +#define RRD(n) REFLECT_RESOURCE_DIMENSION_ ## n + + // unlike glsl, texture arrays will have index in separate argument + const bool isArray = psRes->eDimension == RRD(TEXTURE1DARRAY) || psRes->eDimension == RRD(TEXTURE2DARRAY) + || psRes->eDimension == RRD(TEXTURE2DMSARRAY) || psRes->eDimension == RRD(TEXTURECUBEARRAY); + + uint32_t flags = TO_FLAG_UNSIGNED_INTEGER, opMask = OPERAND_4_COMPONENT_MASK_ALL; + switch (psRes->eDimension) + { + case RRD(TEXTURE3D): + opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y | OPERAND_4_COMPONENT_MASK_Z; + flags |= TO_AUTO_EXPAND_TO_VEC3; + break; + case RRD(TEXTURECUBE): case RRD(TEXTURECUBEARRAY): + case RRD(TEXTURE2DARRAY): case RRD(TEXTURE2DMSARRAY): case RRD(TEXTURE2D): case RRD(TEXTURE2DMS): + opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y; + flags |= TO_AUTO_EXPAND_TO_VEC2; + break; + case RRD(TEXTURE1D): case RRD(TEXTURE1DARRAY): + opMask = OPERAND_4_COMPONENT_MASK_X; + break; + default: + ASSERT(0); break; + } + + int srcCount = psSrc->GetNumSwizzleElements(), numParenthesis = 0; + psContext->AddIndentation(); + AddAssignToDest(psDest, srcDataType, srcCount, &numParenthesis); + glsl << TranslateOperand(psSrc, TO_FLAG_NAME_ONLY); + bcatcstr(glsl, ".read("); + glsl << TranslateOperand(psSrcAddr, flags, opMask); + if (isArray) + { + // NB cube array is handled incorrectly - it needs extra "face" arg + switch (psRes->eDimension) + { + case RRD(TEXTURE1DARRAY): opMask = OPERAND_4_COMPONENT_MASK_Y; break; + case RRD(TEXTURE2DARRAY): case RRD(TEXTURE2DMSARRAY): opMask = OPERAND_4_COMPONENT_MASK_Z; break; + case RRD(TEXTURECUBEARRAY): opMask = OPERAND_4_COMPONENT_MASK_W; break; + default: ASSERT(0); break; + } + + bcatcstr(glsl, ", "); + glsl << TranslateOperand(psSrcAddr, TO_FLAG_UNSIGNED_INTEGER, opMask); + } + bcatcstr(glsl, ")"); + glsl << TranslateOperandSwizzle(&psInst->asOperands[0], OPERAND_4_COMPONENT_MASK_ALL, 0); + AddAssignPrologue(numParenthesis); + +#undef RRD + + break; + } + case OPCODE_STORE_RAW: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//STORE_RAW\n"); +#endif + TranslateShaderStorageStore(psInst); + break; + } + case OPCODE_STORE_STRUCTURED: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//STORE_STRUCTURED\n"); +#endif + TranslateShaderStorageStore(psInst); + break; + } - glsl << TranslateOperand(&psInst->asOperands[2], ResourceReturnTypeToFlag(psRes->ui32ReturnType)); - bcatcstr(glsl, ", "); - glsl << TranslateOperand(&psInst->asOperands[1], flags, opMask); - if(isArray) + case OPCODE_STORE_UAV_TYPED: { - // NB cube array is handled incorrectly - it needs extra "face" arg - flags = TO_FLAG_UNSIGNED_INTEGER; + const ResourceBinding* psRes; + int foundResource; + +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//STORE_UAV_TYPED\n"); +#endif + foundResource = psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_UAV, + psInst->asOperands[0].ui32RegisterNumber, + &psRes); + ASSERT(foundResource); + + if (psRes->eDimension == REFLECT_RESOURCE_DIMENSION_BUFFER) // Hack typed buffer as raw buf + { + psInst->asOperands[0].aeDataType[0] = ResourceReturnTypeToSVTType(psRes->ui32ReturnType, psRes->ePrecision); + psInst->asOperands[1].eSelMode = OPERAND_4_COMPONENT_SELECT_1_MODE; + if (psInst->asOperands[1].eType == OPERAND_TYPE_IMMEDIATE32) + psInst->asOperands[1].iNumComponents = 1; + TranslateShaderStorageStore(psInst); + break; + } + + psContext->AddIndentation(); + + glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_NAME_ONLY); + bcatcstr(glsl, ".write("); + + #define RRD(n) REFLECT_RESOURCE_DIMENSION_ ## n + + // unlike glsl, texture arrays will have index in separate argument + const bool isArray = psRes->eDimension == RRD(TEXTURE1DARRAY) || psRes->eDimension == RRD(TEXTURE2DARRAY) + || psRes->eDimension == RRD(TEXTURE2DMSARRAY) || psRes->eDimension == RRD(TEXTURECUBEARRAY); + + uint32_t flags = TO_FLAG_UNSIGNED_INTEGER, opMask = OPERAND_4_COMPONENT_MASK_ALL; switch (psRes->eDimension) { - case RRD(TEXTURE1DARRAY): opMask = OPERAND_4_COMPONENT_MASK_Y; break; - case RRD(TEXTURE2DARRAY): case RRD(TEXTURE2DMSARRAY):opMask = OPERAND_4_COMPONENT_MASK_Z; break; - case RRD(TEXTURECUBEARRAY): opMask = OPERAND_4_COMPONENT_MASK_Z; break; - default: ASSERT(0); break; + case RRD(TEXTURE1D): case RRD(TEXTURE1DARRAY): + opMask = OPERAND_4_COMPONENT_MASK_X; + break; + case RRD(TEXTURE2D): case RRD(TEXTURE2DMS): case RRD(TEXTURE2DARRAY): case RRD(TEXTURE2DMSARRAY): + opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y; + flags |= TO_AUTO_EXPAND_TO_VEC2; + break; + case RRD(TEXTURE3D): case RRD(TEXTURECUBE): case RRD(TEXTURECUBEARRAY): + opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y | OPERAND_4_COMPONENT_MASK_Z; + flags |= TO_AUTO_EXPAND_TO_VEC3; + break; + default: + ASSERT(0); + break; } + + glsl << TranslateOperand(&psInst->asOperands[2], ResourceReturnTypeToFlag(psRes->ui32ReturnType)); bcatcstr(glsl, ", "); glsl << TranslateOperand(&psInst->asOperands[1], flags, opMask); - } - bformata(glsl, ");\n"); + if (isArray) + { + // NB cube array is handled incorrectly - it needs extra "face" arg + flags = TO_FLAG_UNSIGNED_INTEGER; + switch (psRes->eDimension) + { + case RRD(TEXTURE1DARRAY): opMask = OPERAND_4_COMPONENT_MASK_Y; break; + case RRD(TEXTURE2DARRAY): case RRD(TEXTURE2DMSARRAY): opMask = OPERAND_4_COMPONENT_MASK_Z; break; + case RRD(TEXTURECUBEARRAY): opMask = OPERAND_4_COMPONENT_MASK_Z; break; + default: ASSERT(0); break; + } + + bcatcstr(glsl, ", "); + glsl << TranslateOperand(&psInst->asOperands[1], flags, opMask); + } + bformata(glsl, ");\n"); #undef RRD - break; - } - case OPCODE_LD_RAW: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//LD_RAW\n"); -#endif - - TranslateShaderStorageLoad(psInst); - break; - } - - case OPCODE_ATOMIC_CMP_STORE: - case OPCODE_IMM_ATOMIC_AND: - case OPCODE_ATOMIC_AND: - case OPCODE_IMM_ATOMIC_IADD: - case OPCODE_ATOMIC_IADD: - case OPCODE_ATOMIC_OR: - case OPCODE_ATOMIC_XOR: - case OPCODE_ATOMIC_IMAX: - case OPCODE_ATOMIC_IMIN: - case OPCODE_ATOMIC_UMAX: - case OPCODE_ATOMIC_UMIN: - case OPCODE_IMM_ATOMIC_IMAX: - case OPCODE_IMM_ATOMIC_IMIN: - case OPCODE_IMM_ATOMIC_UMAX: - case OPCODE_IMM_ATOMIC_UMIN: - case OPCODE_IMM_ATOMIC_OR: - case OPCODE_IMM_ATOMIC_XOR: - case OPCODE_IMM_ATOMIC_EXCH: - case OPCODE_IMM_ATOMIC_CMP_EXCH: - { - TranslateAtomicMemOp(psInst); - break; - } - case OPCODE_UBFE: - case OPCODE_IBFE: - { -#ifdef _DEBUG - psContext->AddIndentation(); - if (psInst->eOpcode == OPCODE_UBFE) - bcatcstr(glsl, "//OPCODE_UBFE\n"); - else - bcatcstr(glsl, "//OPCODE_IBFE\n"); -#endif - - bool isUBFE = psInst->eOpcode == OPCODE_UBFE; - bool isScalar = psInst->asOperands[0].GetNumSwizzleElements() == 1; - - if (isUBFE) - { - if (isScalar) - { - DeclareExtraFunction("UBFE", "\ + break; + } + case OPCODE_LD_RAW: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//LD_RAW\n"); +#endif + + TranslateShaderStorageLoad(psInst); + break; + } + + case OPCODE_ATOMIC_CMP_STORE: + case OPCODE_IMM_ATOMIC_AND: + case OPCODE_ATOMIC_AND: + case OPCODE_IMM_ATOMIC_IADD: + case OPCODE_ATOMIC_IADD: + case OPCODE_ATOMIC_OR: + case OPCODE_ATOMIC_XOR: + case OPCODE_ATOMIC_IMAX: + case OPCODE_ATOMIC_IMIN: + case OPCODE_ATOMIC_UMAX: + case OPCODE_ATOMIC_UMIN: + case OPCODE_IMM_ATOMIC_IMAX: + case OPCODE_IMM_ATOMIC_IMIN: + case OPCODE_IMM_ATOMIC_UMAX: + case OPCODE_IMM_ATOMIC_UMIN: + case OPCODE_IMM_ATOMIC_OR: + case OPCODE_IMM_ATOMIC_XOR: + case OPCODE_IMM_ATOMIC_EXCH: + case OPCODE_IMM_ATOMIC_CMP_EXCH: + { + TranslateAtomicMemOp(psInst); + break; + } + case OPCODE_UBFE: + case OPCODE_IBFE: + { +#ifdef _DEBUG + psContext->AddIndentation(); + if (psInst->eOpcode == OPCODE_UBFE) + bcatcstr(glsl, "//OPCODE_UBFE\n"); + else + bcatcstr(glsl, "//OPCODE_IBFE\n"); +#endif + + bool isUBFE = psInst->eOpcode == OPCODE_UBFE; + bool isScalar = psInst->asOperands[0].GetNumSwizzleElements() == 1; + + if (isUBFE) + { + if (isScalar) + { + DeclareExtraFunction("UBFE", "\ uint bitFieldExtractU(uint width, uint offset, uint src);\n\ uint bitFieldExtractU(uint width, uint offset, uint src)\n\ {\n\ - bool isWidthZero = (width == 0);\n\ - bool needsClamp = ((width + offset) < 32);\n\ - uint clampVersion = src << (32-(width+offset));\n\ - clampVersion = clampVersion >> (32 - width);\n\ - uint simpleVersion = src >> offset;\n\ - uint res = select(simpleVersion, clampVersion, needsClamp);\n\ - return select(res, (uint)0, isWidthZero);\n\ +\tbool isWidthZero = (width == 0);\n\ +\tbool needsClamp = ((width + offset) < 32);\n\ +\tuint clampVersion = src << (32-(width+offset));\n\ +\tclampVersion = clampVersion >> (32 - width);\n\ +\tuint simpleVersion = src >> offset;\n\ +\tuint res = select(simpleVersion, clampVersion, needsClamp);\n\ +\treturn select(res, (uint)0, isWidthZero);\n\ }; "); - } - else - { - DeclareExtraFunction("UBFEV", "\ + } + else + { + DeclareExtraFunction("UBFEV", "\ template vec bitFieldExtractU(const vec width, const vec offset, const vec src)\n\ {\n\ - vec isWidthZero = (width == 0);\n\ - vec needsClamp = ((width + offset) < 32);\n\ - vec clampVersion = src << (32-(width+offset));\n\ - clampVersion = clampVersion >> (32 - width);\n\ - vec simpleVersion = src >> offset;\n\ - vec res = select(simpleVersion, clampVersion, needsClamp);\n\ - return select(res, vec(0), isWidthZero);\n\ +\tvec isWidthZero = (width == 0);\n\ +\tvec needsClamp = ((width + offset) < 32);\n\ +\tvec clampVersion = src << (32-(width+offset));\n\ +\tclampVersion = clampVersion >> (32 - width);\n\ +\tvec simpleVersion = src >> offset;\n\ +\tvec res = select(simpleVersion, clampVersion, needsClamp);\n\ +\treturn select(res, vec(0), isWidthZero);\n\ }; "); - } - } - else - { - if (isScalar) - { - DeclareExtraFunction("IBFE", "\ + } + } + else + { + if (isScalar) + { + DeclareExtraFunction("IBFE", "\ template int bitFieldExtractI(uint width, uint offset, int src)\n\ {\n\ - bool isWidthZero = (width == 0);\n\ - bool needsClamp = ((width + offset) < 32);\n\ - int clampVersion = src << (32-(width+offset));\n\ - clampVersion = clampVersion >> (32 - width);\n\ - int simpleVersion = src >> offset;\n\ - int res = select(simpleVersion, clampVersion, needsClamp);\n\ - return select(res, (int)0, isWidthZero);\n\ +\tbool isWidthZero = (width == 0);\n\ +\tbool needsClamp = ((width + offset) < 32);\n\ +\tint clampVersion = src << (32-(width+offset));\n\ +\tclampVersion = clampVersion >> (32 - width);\n\ +\tint simpleVersion = src >> offset;\n\ +\tint res = select(simpleVersion, clampVersion, needsClamp);\n\ +\treturn select(res, (int)0, isWidthZero);\n\ }; "); - } - else - { - DeclareExtraFunction("IBFEV", "\ + } + else + { + DeclareExtraFunction("IBFEV", "\ template vec bitFieldExtractI(const vec width, const vec offset, const vec src)\n\ {\n\ - vec isWidthZero = (width == 0);\n\ - vec needsClamp = ((width + offset) < 32);\n\ - vec clampVersion = src << (32-(width+offset));\n\ - clampVersion = clampVersion >> (32 - width);\n\ - vec simpleVersion = src >> offset;\n\ - vec res = select(simpleVersion, clampVersion, needsClamp);\n\ - return select(res, vec(0), isWidthZero);\n\ +\tvec isWidthZero = (width == 0);\n\ +\tvec needsClamp = ((width + offset) < 32);\n\ +\tvec clampVersion = src << (32-(width+offset));\n\ +\tclampVersion = clampVersion >> (32 - width);\n\ +\tvec simpleVersion = src >> offset;\n\ +\tvec res = select(simpleVersion, clampVersion, needsClamp);\n\ +\treturn select(res, vec(0), isWidthZero);\n\ }; "); - } - } - psContext->AddIndentation(); - - uint32_t destMask = psInst->asOperands[0].GetAccessMask(); - AddAssignToDest(&psInst->asOperands[0], isUBFE ? SVT_UINT : SVT_INT, psInst->asOperands[0].GetNumSwizzleElements(), &numParenthesis); - bcatcstr(glsl, "bitFieldExtract"); - bcatcstr(glsl, isUBFE ? "U" : "I"); - bcatcstr(glsl, "("); - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, destMask); - bcatcstr(glsl, ", "); - glsl << TranslateOperand(&psInst->asOperands[2], TO_FLAG_UNSIGNED_INTEGER, destMask); - bcatcstr(glsl, ", "); - glsl << TranslateOperand(&psInst->asOperands[3], isUBFE ? TO_FLAG_UNSIGNED_INTEGER : TO_FLAG_INTEGER, destMask); - bcatcstr(glsl, ")"); - AddAssignPrologue(numParenthesis); - break; - } - case OPCODE_RCP: - { - const uint32_t destElemCount = psInst->asOperands[0].GetNumSwizzleElements(); - const uint32_t srcElemCount = psInst->asOperands[1].GetNumSwizzleElements(); - int numParenthesis = 0; -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//RCP\n"); -#endif - psContext->AddIndentation(); - - SHADER_VARIABLE_TYPE dstType = psInst->asOperands[0].GetDataType(psContext); - SHADER_VARIABLE_TYPE srcType = psInst->asOperands[1].GetDataType(psContext); - - uint32_t typeFlags = TO_FLAG_NONE; - if (dstType == SVT_FLOAT16 && srcType == SVT_FLOAT16) - { - typeFlags = TO_FLAG_FORCE_HALF; - } - else - srcType = SVT_FLOAT; - - AddAssignToDest(&psInst->asOperands[0], srcType, srcElemCount, &numParenthesis); - bcatcstr(glsl, GetConstructorForTypeMetal(srcType, destElemCount)); - bcatcstr(glsl, "(1.0) / "); - bcatcstr(glsl, GetConstructorForTypeMetal(srcType, destElemCount)); - bcatcstr(glsl, "("); - numParenthesis++; - glsl << TranslateOperand(&psInst->asOperands[1], typeFlags, psInst->asOperands[0].GetAccessMask()); - AddAssignPrologue(numParenthesis); - break; - } - case OPCODE_F32TOF16: - { - uint32_t writeMask = psInst->asOperands[0].GetAccessMask(); - -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//F32TOF16\n"); -#endif - - for (int i = 0; i < 4; i++) - { - if ((writeMask & (1 << i)) == 0) - continue; - psContext->AddIndentation(); - psInst->asOperands[0].ui32CompMask = (1 << i); - psInst->asOperands[0].eSelMode = OPERAND_4_COMPONENT_MASK_MODE; - AddAssignToDest(&psInst->asOperands[0], SVT_UINT, 1, &numParenthesis); - - bcatcstr(glsl, "as_type(half2("); - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_NONE, (1 << i)); - bcatcstr(glsl, ", 0.0))"); - AddAssignPrologue(numParenthesis); - } - break; - } - case OPCODE_F16TOF32: - { - uint32_t writeMask = psInst->asOperands[0].GetAccessMask(); - -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//F16TOF32\n"); -#endif - - for (int i = 0; i < 4; i++) - { - if ((writeMask & (1 << i)) == 0) - continue; - psContext->AddIndentation(); - psInst->asOperands[0].ui32CompMask = (1 << i); - psInst->asOperands[0].eSelMode = OPERAND_4_COMPONENT_MASK_MODE; - AddAssignToDest(&psInst->asOperands[0], SVT_FLOAT, 1, &numParenthesis); - - bcatcstr(glsl, "as_type("); - glsl << TranslateOperand(&psInst->asOperands[1], TO_AUTO_BITCAST_TO_UINT, (1 << i)); - bcatcstr(glsl, ").x"); - AddAssignPrologue(numParenthesis); - } - break; - } - case OPCODE_INEG: - { - int numParenthesis = 0; -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//INEG\n"); -#endif - //dest = 0 - src0 - psContext->AddIndentation(); - - AddAssignToDest(&psInst->asOperands[0], SVT_INT, psInst->asOperands[1].GetNumSwizzleElements(), &numParenthesis); - - bcatcstr(glsl, "0 - "); - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER, psInst->asOperands[0].GetAccessMask()); - AddAssignPrologue(numParenthesis); - break; - } - case OPCODE_DERIV_RTX_COARSE: - case OPCODE_DERIV_RTX_FINE: - case OPCODE_DERIV_RTX: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//DERIV_RTX\n"); -#endif - CallHelper1("dfdx", psInst, 0, 1, 1); - break; - } - case OPCODE_DERIV_RTY_COARSE: - case OPCODE_DERIV_RTY_FINE: - case OPCODE_DERIV_RTY: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//DERIV_RTY\n"); -#endif - CallHelper1("dfdy", psInst, 0, 1, 1); - break; - } - case OPCODE_LRP: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//LRP\n"); -#endif - CallHelper3("mix", psInst, 0, 2, 3, 1, 1); - break; - } - case OPCODE_DP2ADD: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//DP2ADD\n"); -#endif - psContext->AddIndentation(); - bool isFP16 = false; - if (CanForceToHalfOperand(&psInst->asOperands[0]) - && CanForceToHalfOperand(&psInst->asOperands[1]) - && CanForceToHalfOperand(&psInst->asOperands[2]) - && CanForceToHalfOperand(&psInst->asOperands[2])) - isFP16 = true; - int parenthesis = 0; - AddAssignToDest(&psInst->asOperands[0], isFP16 ? SVT_FLOAT16 : SVT_FLOAT, 2, &parenthesis); - - uint32_t flags = TO_AUTO_EXPAND_TO_VEC2; - flags |= isFP16 ? TO_FLAG_FORCE_HALF : TO_AUTO_BITCAST_TO_FLOAT; - - bcatcstr(glsl, "dot("); - glsl << TranslateOperand(&psInst->asOperands[1], flags); - bcatcstr(glsl, ", "); - glsl << TranslateOperand(&psInst->asOperands[2], flags); - bcatcstr(glsl, ") + "); - glsl << TranslateOperand(&psInst->asOperands[3], flags); - AddAssignPrologue(parenthesis); - break; - } - case OPCODE_POW: - { - // TODO Check POW opcode whether it actually needs the abs -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//POW\n"); -#endif - psContext->AddIndentation(); - glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); - bcatcstr(glsl, " = powr(abs("); - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_NONE); - bcatcstr(glsl, "), "); - glsl << TranslateOperand(&psInst->asOperands[2], TO_FLAG_NONE); - bcatcstr(glsl, ");\n"); - break; - } - - case OPCODE_IMM_ATOMIC_ALLOC: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//IMM_ATOMIC_ALLOC\n"); -#endif - psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], SVT_UINT, 1, &numParenthesis); - bcatcstr(glsl, "atomic_fetch_add_explicit("); - glsl << ResourceName(RGROUP_UAV, psInst->asOperands[1].ui32RegisterNumber); - bcatcstr(glsl, "_counter, 1, memory_order::memory_order_relaxed)"); - AddAssignPrologue(numParenthesis); - break; - } - case OPCODE_IMM_ATOMIC_CONSUME: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//IMM_ATOMIC_CONSUME\n"); -#endif - psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], SVT_UINT, 1, &numParenthesis); - bcatcstr(glsl, "atomic_fetch_sub_explicit("); - glsl << ResourceName(RGROUP_UAV, psInst->asOperands[1].ui32RegisterNumber); - // Metal atomic sub returns previous value. Therefore minus one here to get the correct data index. - bcatcstr(glsl, "_counter, 1, memory_order::memory_order_relaxed) - 1"); - AddAssignPrologue(numParenthesis); - break; - } - - case OPCODE_NOT: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//NOT\n"); -#endif - psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], SVT_INT, psInst->asOperands[1].GetNumSwizzleElements(), &numParenthesis); - - bcatcstr(glsl, "~"); - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER, psInst->asOperands[0].GetAccessMask()); - AddAssignPrologue(numParenthesis); - break; - } - case OPCODE_XOR: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//XOR\n"); -#endif - - CallBinaryOp("^", psInst, 0, 1, 2, SVT_UINT); - break; - } - case OPCODE_RESINFO: - { - - uint32_t destElemCount = psInst->asOperands[0].GetNumSwizzleElements(); - uint32_t destElem; -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//RESINFO\n"); -#endif - - for (destElem = 0; destElem < destElemCount; ++destElem) - { - GetResInfoData(psInst, psInst->asOperands[2].aui32Swizzle[destElem], destElem); - } - - break; - } - - case OPCODE_BUFINFO: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//BUFINFO\n"); -#endif - psContext->m_Reflection.OnDiagnostics("Metal shading language does not support buffer size query from shader. Pass the size to shader as const instead.\n", 0, false); // TODO: change this into error after modifying gfx-test 450 - break; - } - - case OPCODE_SAMPLE_INFO: - { -#ifdef _DEBUG - psContext->AddIndentation(); - bcatcstr(glsl, "//SAMPLE_INFO\n"); -#endif - const RESINFO_RETURN_TYPE eResInfoReturnType = psInst->eResInfoReturnType; - psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_UINT ? SVT_UINT : SVT_FLOAT, 1, &numParenthesis); - bcatcstr(glsl, TranslateOperand(&psInst->asOperands[1], TO_FLAG_NAME_ONLY).c_str()); - bcatcstr(glsl, ".get_num_samples()"); - AddAssignPrologue(numParenthesis); - break; - } - - case OPCODE_DMAX: - case OPCODE_DMIN: - case OPCODE_DMUL: - case OPCODE_DEQ: - case OPCODE_DGE: - case OPCODE_DLT: - case OPCODE_DNE: - case OPCODE_DMOV: - case OPCODE_DMOVC: - case OPCODE_DTOF: - case OPCODE_FTOD: - case OPCODE_DDIV: - case OPCODE_DRCP: - case OPCODE_MSAD: - case OPCODE_DTOI: - case OPCODE_DTOU: - case OPCODE_ITOD: - case OPCODE_UTOD: - default: - { - ASSERT(0); - break; - } - } - - if (psInst->bSaturate) //Saturate is only for floating point data (float opcodes or MOV) - { - int dstCount = psInst->asOperands[0].GetNumSwizzleElements(); - psContext->AddIndentation(); - bool isFP16 = false; - if (psInst->asOperands[0].GetDataType(psContext) == SVT_FLOAT16) - isFP16 = true; - AddAssignToDest(&psInst->asOperands[0], isFP16 ? SVT_FLOAT16 : SVT_FLOAT, dstCount, &numParenthesis); - bcatcstr(glsl, "clamp("); - - glsl << TranslateOperand(&psInst->asOperands[0], isFP16 ? TO_FLAG_FORCE_HALF : TO_AUTO_BITCAST_TO_FLOAT); - if(isFP16) - bcatcstr(glsl, ", 0.0h, 1.0h)"); - else - bcatcstr(glsl, ", 0.0f, 1.0f)"); - AddAssignPrologue(numParenthesis); - } + } + } + psContext->AddIndentation(); + + uint32_t destMask = psInst->asOperands[0].GetAccessMask(); + uint32_t src2SwizCount = psInst->asOperands[3].GetNumSwizzleElements(destMask); + uint32_t src1SwizCount = psInst->asOperands[2].GetNumSwizzleElements(destMask); + uint32_t src0SwizCount = psInst->asOperands[1].GetNumSwizzleElements(destMask); + uint32_t ui32Flags = 0; + + if (src1SwizCount != src0SwizCount || src2SwizCount != src0SwizCount) + { + uint32_t maxElems = std::max(src2SwizCount, std::max(src1SwizCount, src0SwizCount)); + ui32Flags |= (TO_AUTO_EXPAND_TO_VEC2 << (maxElems - 2)); + } + + AddAssignToDest(&psInst->asOperands[0], isUBFE ? SVT_UINT : SVT_INT, psInst->asOperands[0].GetNumSwizzleElements(), &numParenthesis); + bcatcstr(glsl, "bitFieldExtract"); + bcatcstr(glsl, isUBFE ? "U" : "I"); + bcatcstr(glsl, "("); + glsl << TranslateOperand(&psInst->asOperands[1], ui32Flags | TO_FLAG_UNSIGNED_INTEGER, destMask); + bcatcstr(glsl, ", "); + glsl << TranslateOperand(&psInst->asOperands[2], ui32Flags | TO_FLAG_UNSIGNED_INTEGER, destMask); + bcatcstr(glsl, ", "); + glsl << TranslateOperand(&psInst->asOperands[3], ui32Flags | (isUBFE ? TO_FLAG_UNSIGNED_INTEGER : TO_FLAG_INTEGER), destMask); + bcatcstr(glsl, ")"); + AddAssignPrologue(numParenthesis); + break; + } + case OPCODE_RCP: + { + const uint32_t destElemCount = psInst->asOperands[0].GetNumSwizzleElements(); + const uint32_t srcElemCount = psInst->asOperands[1].GetNumSwizzleElements(); + int numParenthesis = 0; +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//RCP\n"); +#endif + psContext->AddIndentation(); + + SHADER_VARIABLE_TYPE dstType = psInst->asOperands[0].GetDataType(psContext); + SHADER_VARIABLE_TYPE srcType = psInst->asOperands[1].GetDataType(psContext); + + uint32_t typeFlags = TO_FLAG_NONE; + if (dstType == SVT_FLOAT16 && srcType == SVT_FLOAT16) + { + typeFlags = TO_FLAG_FORCE_HALF; + } + else + srcType = SVT_FLOAT; + + AddAssignToDest(&psInst->asOperands[0], srcType, srcElemCount, &numParenthesis); + bcatcstr(glsl, GetConstructorForTypeMetal(srcType, destElemCount)); + bcatcstr(glsl, "(1.0) / "); + bcatcstr(glsl, GetConstructorForTypeMetal(srcType, destElemCount)); + bcatcstr(glsl, "("); + numParenthesis++; + glsl << TranslateOperand(&psInst->asOperands[1], typeFlags, psInst->asOperands[0].GetAccessMask()); + AddAssignPrologue(numParenthesis); + break; + } + case OPCODE_F32TOF16: + { + uint32_t writeMask = psInst->asOperands[0].GetAccessMask(); + +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//F32TOF16\n"); +#endif + + for (int i = 0; i < 4; i++) + { + if ((writeMask & (1 << i)) == 0) + continue; + psContext->AddIndentation(); + psInst->asOperands[0].ui32CompMask = (1 << i); + psInst->asOperands[0].eSelMode = OPERAND_4_COMPONENT_MASK_MODE; + AddAssignToDest(&psInst->asOperands[0], SVT_UINT, 1, &numParenthesis); + + bcatcstr(glsl, "as_type(half2("); + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_NONE, (1 << i)); + bcatcstr(glsl, ", 0.0))"); + AddAssignPrologue(numParenthesis); + } + break; + } + case OPCODE_F16TOF32: + { + uint32_t writeMask = psInst->asOperands[0].GetAccessMask(); + +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//F16TOF32\n"); +#endif + + for (int i = 0; i < 4; i++) + { + if ((writeMask & (1 << i)) == 0) + continue; + psContext->AddIndentation(); + psInst->asOperands[0].ui32CompMask = (1 << i); + psInst->asOperands[0].eSelMode = OPERAND_4_COMPONENT_MASK_MODE; + AddAssignToDest(&psInst->asOperands[0], SVT_FLOAT, 1, &numParenthesis); + + bcatcstr(glsl, "as_type("); + glsl << TranslateOperand(&psInst->asOperands[1], TO_AUTO_BITCAST_TO_UINT, (1 << i)); + bcatcstr(glsl, ").x"); + AddAssignPrologue(numParenthesis); + } + break; + } + case OPCODE_INEG: + { + int numParenthesis = 0; +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//INEG\n"); +#endif + //dest = 0 - src0 + psContext->AddIndentation(); + + AddAssignToDest(&psInst->asOperands[0], SVT_INT, psInst->asOperands[1].GetNumSwizzleElements(), &numParenthesis); + + bcatcstr(glsl, "0 - "); + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER, psInst->asOperands[0].GetAccessMask()); + AddAssignPrologue(numParenthesis); + break; + } + case OPCODE_DERIV_RTX_COARSE: + case OPCODE_DERIV_RTX_FINE: + case OPCODE_DERIV_RTX: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//DERIV_RTX\n"); +#endif + CallHelper1("dfdx", psInst, 0, 1, 1); + break; + } + case OPCODE_DERIV_RTY_COARSE: + case OPCODE_DERIV_RTY_FINE: + case OPCODE_DERIV_RTY: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//DERIV_RTY\n"); +#endif + CallHelper1("dfdy", psInst, 0, 1, 1); + break; + } + case OPCODE_LRP: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//LRP\n"); +#endif + CallHelper3("mix", psInst, 0, 2, 3, 1, 1); + break; + } + case OPCODE_DP2ADD: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//DP2ADD\n"); +#endif + psContext->AddIndentation(); + bool isFP16 = false; + if (CanForceToHalfOperand(&psInst->asOperands[0]) + && CanForceToHalfOperand(&psInst->asOperands[1]) + && CanForceToHalfOperand(&psInst->asOperands[2]) + && CanForceToHalfOperand(&psInst->asOperands[2])) + isFP16 = true; + int parenthesis = 0; + AddAssignToDest(&psInst->asOperands[0], isFP16 ? SVT_FLOAT16 : SVT_FLOAT, 2, &parenthesis); + + uint32_t flags = TO_AUTO_EXPAND_TO_VEC2; + flags |= isFP16 ? TO_FLAG_FORCE_HALF : TO_AUTO_BITCAST_TO_FLOAT; + + bcatcstr(glsl, "dot("); + glsl << TranslateOperand(&psInst->asOperands[1], flags); + bcatcstr(glsl, ", "); + glsl << TranslateOperand(&psInst->asOperands[2], flags); + bcatcstr(glsl, ") + "); + glsl << TranslateOperand(&psInst->asOperands[3], flags); + AddAssignPrologue(parenthesis); + break; + } + case OPCODE_POW: + { + // TODO Check POW opcode whether it actually needs the abs +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//POW\n"); +#endif + psContext->AddIndentation(); + glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); + bcatcstr(glsl, " = powr(abs("); + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_NONE); + bcatcstr(glsl, "), "); + glsl << TranslateOperand(&psInst->asOperands[2], TO_FLAG_NONE); + bcatcstr(glsl, ");\n"); + break; + } + + case OPCODE_IMM_ATOMIC_ALLOC: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_ALLOC\n"); +#endif + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], SVT_UINT, 1, &numParenthesis); + bcatcstr(glsl, "atomic_fetch_add_explicit("); + glsl << ResourceName(RGROUP_UAV, psInst->asOperands[1].ui32RegisterNumber); + bcatcstr(glsl, "_counter, 1, memory_order::memory_order_relaxed)"); + AddAssignPrologue(numParenthesis); + break; + } + case OPCODE_IMM_ATOMIC_CONSUME: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_CONSUME\n"); +#endif + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], SVT_UINT, 1, &numParenthesis); + bcatcstr(glsl, "atomic_fetch_sub_explicit("); + glsl << ResourceName(RGROUP_UAV, psInst->asOperands[1].ui32RegisterNumber); + // Metal atomic sub returns previous value. Therefore minus one here to get the correct data index. + bcatcstr(glsl, "_counter, 1, memory_order::memory_order_relaxed) - 1"); + AddAssignPrologue(numParenthesis); + break; + } + + case OPCODE_NOT: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//NOT\n"); +#endif + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], SVT_INT, psInst->asOperands[1].GetNumSwizzleElements(), &numParenthesis); + + bcatcstr(glsl, "~"); + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER, psInst->asOperands[0].GetAccessMask()); + AddAssignPrologue(numParenthesis); + break; + } + case OPCODE_XOR: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//XOR\n"); +#endif + + CallBinaryOp("^", psInst, 0, 1, 2, SVT_UINT); + break; + } + case OPCODE_RESINFO: + { + uint32_t destElemCount = psInst->asOperands[0].GetNumSwizzleElements(); + uint32_t destElem; +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//RESINFO\n"); +#endif + + for (destElem = 0; destElem < destElemCount; ++destElem) + { + GetResInfoData(psInst, psInst->asOperands[2].aui32Swizzle[destElem], destElem); + } + + break; + } + + case OPCODE_BUFINFO: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//BUFINFO\n"); +#endif + psContext->m_Reflection.OnDiagnostics("Metal shading language does not support buffer size query from shader. Pass the size to shader as const instead.\n", 0, false); // TODO: change this into error after modifying gfx-test 450 + break; + } + + case OPCODE_SAMPLE_INFO: + { +#ifdef _DEBUG + psContext->AddIndentation(); + bcatcstr(glsl, "//SAMPLE_INFO\n"); +#endif + const RESINFO_RETURN_TYPE eResInfoReturnType = psInst->eResInfoReturnType; + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_UINT ? SVT_UINT : SVT_FLOAT, 1, &numParenthesis); + bcatcstr(glsl, TranslateOperand(&psInst->asOperands[1], TO_FLAG_NAME_ONLY).c_str()); + bcatcstr(glsl, ".get_num_samples()"); + AddAssignPrologue(numParenthesis); + break; + } + + case OPCODE_DMAX: + case OPCODE_DMIN: + case OPCODE_DMUL: + case OPCODE_DEQ: + case OPCODE_DGE: + case OPCODE_DLT: + case OPCODE_DNE: + case OPCODE_DMOV: + case OPCODE_DMOVC: + case OPCODE_DTOF: + case OPCODE_FTOD: + case OPCODE_DDIV: + case OPCODE_DRCP: + case OPCODE_MSAD: + case OPCODE_DTOI: + case OPCODE_DTOU: + case OPCODE_ITOD: + case OPCODE_UTOD: + default: + { + ASSERT(0); + break; + } + } + + if (psInst->bSaturate) //Saturate is only for floating point data (float opcodes or MOV) + { + int dstCount = psInst->asOperands[0].GetNumSwizzleElements(); + psContext->AddIndentation(); + bool isFP16 = false; + if (psInst->asOperands[0].GetDataType(psContext) == SVT_FLOAT16) + isFP16 = true; + AddAssignToDest(&psInst->asOperands[0], isFP16 ? SVT_FLOAT16 : SVT_FLOAT, dstCount, &numParenthesis); + bcatcstr(glsl, "clamp("); + + glsl << TranslateOperand(&psInst->asOperands[0], isFP16 ? TO_FLAG_FORCE_HALF : TO_AUTO_BITCAST_TO_FLOAT); + if (isFP16) + bcatcstr(glsl, ", 0.0h, 1.0h)"); + else + bcatcstr(glsl, ", 0.0f, 1.0f)"); + AddAssignPrologue(numParenthesis); + } } diff --git a/src/toMetalOperand.cpp b/src/toMetalOperand.cpp index 63531e1..0c55d7c 100644 --- a/src/toMetalOperand.cpp +++ b/src/toMetalOperand.cpp @@ -31,303 +31,299 @@ using namespace HLSLcc; // Returns nonzero if types are just different precisions of the same underlying type static bool AreTypesCompatibleMetal(SHADER_VARIABLE_TYPE a, uint32_t ui32TOFlag) { - SHADER_VARIABLE_TYPE b = TypeFlagsToSVTType(ui32TOFlag); + SHADER_VARIABLE_TYPE b = TypeFlagsToSVTType(ui32TOFlag); - if (a == b) - return true; + if (a == b) + return true; - // Special case for array indices: both uint and int are fine - if ((ui32TOFlag & TO_FLAG_INTEGER) && (ui32TOFlag & TO_FLAG_UNSIGNED_INTEGER) && - (a == SVT_INT || a == SVT_INT16 || a == SVT_UINT || a == SVT_UINT16)) - return true; + // Special case for array indices: both uint and int are fine + if ((ui32TOFlag & TO_FLAG_INTEGER) && (ui32TOFlag & TO_FLAG_UNSIGNED_INTEGER) && + (a == SVT_INT || a == SVT_INT16 || a == SVT_UINT || a == SVT_UINT16)) + return true; - return false; + return false; } std::string ToMetal::TranslateOperandSwizzle(const Operand* psOperand, uint32_t ui32ComponentMask, int iRebase, bool includeDot /*= true*/) { - std::ostringstream oss; - uint32_t accessMask = ui32ComponentMask & psOperand->GetAccessMask(); - if(psOperand->eType == OPERAND_TYPE_INPUT) - { - int regSpace = psOperand->GetRegisterSpace(psContext); - // Skip swizzle for scalar inputs, but only if we haven't redirected them - if (regSpace == 0) - { - if ((psContext->psShader->asPhases[psContext->currentPhase].acInputNeedsRedirect[psOperand->ui32RegisterNumber] == 0) && - (psContext->psShader->abScalarInput[regSpace][psOperand->ui32RegisterNumber] & accessMask)) - { - return ""; - } - } - else - { - if ((psContext->psShader->asPhases[psContext->currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] == 0) && - (psContext->psShader->abScalarInput[regSpace][psOperand->ui32RegisterNumber] & accessMask)) - { - return ""; - } - } - } - if (psOperand->eType == OPERAND_TYPE_OUTPUT) - { - int regSpace = psOperand->GetRegisterSpace(psContext); - // Skip swizzle for scalar outputs, but only if we haven't redirected them - if (regSpace == 0) - { - if ((psContext->psShader->asPhases[psContext->currentPhase].acOutputNeedsRedirect[psOperand->ui32RegisterNumber] == 0) && - (psContext->psShader->abScalarOutput[regSpace][psOperand->ui32RegisterNumber] & accessMask)) - { - return ""; - } - } - else - { - if ((psContext->psShader->asPhases[psContext->currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] == 0) && - (psContext->psShader->abScalarOutput[regSpace][psOperand->ui32RegisterNumber] & accessMask)) - { - return ""; - } - } - } - - if(psOperand->iWriteMaskEnabled && - psOperand->iNumComponents != 1) - { - //Component Mask - if(psOperand->eSelMode == OPERAND_4_COMPONENT_MASK_MODE) - { - uint32_t mask; - if (psOperand->ui32CompMask != 0) - mask = psOperand->ui32CompMask & ui32ComponentMask; - else - mask = ui32ComponentMask; - - if(mask != 0 && mask != OPERAND_4_COMPONENT_MASK_ALL) - { - if (includeDot) + std::ostringstream oss; + uint32_t accessMask = ui32ComponentMask & psOperand->GetAccessMask(); + if (psOperand->eType == OPERAND_TYPE_INPUT) + { + int regSpace = psOperand->GetRegisterSpace(psContext); + // Skip swizzle for scalar inputs, but only if we haven't redirected them + if (regSpace == 0) + { + if ((psContext->psShader->asPhases[psContext->currentPhase].acInputNeedsRedirect[psOperand->ui32RegisterNumber] == 0) && + (psContext->psShader->abScalarInput[regSpace][psOperand->ui32RegisterNumber] & accessMask)) + { + return ""; + } + } + else + { + if ((psContext->psShader->asPhases[psContext->currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] == 0) && + (psContext->psShader->abScalarInput[regSpace][psOperand->ui32RegisterNumber] & accessMask)) + { + return ""; + } + } + } + if (psOperand->eType == OPERAND_TYPE_OUTPUT) + { + int regSpace = psOperand->GetRegisterSpace(psContext); + // Skip swizzle for scalar outputs, but only if we haven't redirected them + if (regSpace == 0) + { + if ((psContext->psShader->asPhases[psContext->currentPhase].acOutputNeedsRedirect[psOperand->ui32RegisterNumber] == 0) && + (psContext->psShader->abScalarOutput[regSpace][psOperand->ui32RegisterNumber] & accessMask)) + { + return ""; + } + } + else + { + if ((psContext->psShader->asPhases[psContext->currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] == 0) && + (psContext->psShader->abScalarOutput[regSpace][psOperand->ui32RegisterNumber] & accessMask)) + { + return ""; + } + } + } + + if (psOperand->iWriteMaskEnabled && + psOperand->iNumComponents != 1) + { + //Component Mask + if (psOperand->eSelMode == OPERAND_4_COMPONENT_MASK_MODE) + { + uint32_t mask; + if (psOperand->ui32CompMask != 0) + mask = psOperand->ui32CompMask & ui32ComponentMask; + else + mask = ui32ComponentMask; + + if (mask != 0 && mask != OPERAND_4_COMPONENT_MASK_ALL) + { + if (includeDot) oss << "."; - if(mask & OPERAND_4_COMPONENT_MASK_X) - { - ASSERT(iRebase == 0); - oss << "x"; - } - if(mask & OPERAND_4_COMPONENT_MASK_Y) - { - ASSERT(iRebase <= 1); - oss << "xy"[1 - iRebase]; - } - if(mask & OPERAND_4_COMPONENT_MASK_Z) - { - ASSERT(iRebase <= 2); - oss << "xyz"[2 - iRebase]; - } - if(mask & OPERAND_4_COMPONENT_MASK_W) - { - ASSERT(iRebase <= 3); - oss << "xyzw"[3 - iRebase]; - } - } - } - else - //Component Swizzle - if(psOperand->eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE) - { - if (ui32ComponentMask != OPERAND_4_COMPONENT_MASK_ALL || - !(psOperand->aui32Swizzle[0] == OPERAND_4_COMPONENT_X && - psOperand->aui32Swizzle[1] == OPERAND_4_COMPONENT_Y && - psOperand->aui32Swizzle[2] == OPERAND_4_COMPONENT_Z && - psOperand->aui32Swizzle[3] == OPERAND_4_COMPONENT_W - ) - ) - { - uint32_t i; + if (mask & OPERAND_4_COMPONENT_MASK_X) + { + ASSERT(iRebase == 0); + oss << "x"; + } + if (mask & OPERAND_4_COMPONENT_MASK_Y) + { + ASSERT(iRebase <= 1); + oss << "xy"[1 - iRebase]; + } + if (mask & OPERAND_4_COMPONENT_MASK_Z) + { + ASSERT(iRebase <= 2); + oss << "xyz"[2 - iRebase]; + } + if (mask & OPERAND_4_COMPONENT_MASK_W) + { + ASSERT(iRebase <= 3); + oss << "xyzw"[3 - iRebase]; + } + } + } + else + //Component Swizzle + if (psOperand->eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE) + { + if (ui32ComponentMask != OPERAND_4_COMPONENT_MASK_ALL || + !(psOperand->aui32Swizzle[0] == OPERAND_4_COMPONENT_X && + psOperand->aui32Swizzle[1] == OPERAND_4_COMPONENT_Y && + psOperand->aui32Swizzle[2] == OPERAND_4_COMPONENT_Z && + psOperand->aui32Swizzle[3] == OPERAND_4_COMPONENT_W + ) + ) + { + uint32_t i; if (includeDot) oss << "."; - for (i = 0; i < 4; ++i) - { - if (!(ui32ComponentMask & (OPERAND_4_COMPONENT_MASK_X << i))) - continue; - - if (psOperand->aui32Swizzle[i] == OPERAND_4_COMPONENT_X) - { - ASSERT(iRebase == 0); - oss << "x"; - } - else if (psOperand->aui32Swizzle[i] == OPERAND_4_COMPONENT_Y) - { - ASSERT(iRebase <= 1); - oss << "xy"[1 - iRebase]; - } - else if (psOperand->aui32Swizzle[i] == OPERAND_4_COMPONENT_Z) - { - ASSERT(iRebase <= 2); - oss << "xyz"[2 - iRebase]; - } - else if (psOperand->aui32Swizzle[i] == OPERAND_4_COMPONENT_W) - { - ASSERT(iRebase <= 3); - oss << "xyzw"[3 - iRebase]; - } - } - } - } - else - if(psOperand->eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE) // ui32ComponentMask is ignored in this case - { + for (i = 0; i < 4; ++i) + { + if (!(ui32ComponentMask & (OPERAND_4_COMPONENT_MASK_X << i))) + continue; + + if (psOperand->aui32Swizzle[i] == OPERAND_4_COMPONENT_X) + { + ASSERT(iRebase == 0); + oss << "x"; + } + else if (psOperand->aui32Swizzle[i] == OPERAND_4_COMPONENT_Y) + { + ASSERT(iRebase <= 1); + oss << "xy"[1 - iRebase]; + } + else if (psOperand->aui32Swizzle[i] == OPERAND_4_COMPONENT_Z) + { + ASSERT(iRebase <= 2); + oss << "xyz"[2 - iRebase]; + } + else if (psOperand->aui32Swizzle[i] == OPERAND_4_COMPONENT_W) + { + ASSERT(iRebase <= 3); + oss << "xyzw"[3 - iRebase]; + } + } + } + } + else if (psOperand->eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE) // ui32ComponentMask is ignored in this case + { if (includeDot) oss << "."; - if(psOperand->aui32Swizzle[0] == OPERAND_4_COMPONENT_X) - { - ASSERT(iRebase == 0); - oss << "x"; - } - else - if(psOperand->aui32Swizzle[0] == OPERAND_4_COMPONENT_Y) - { - ASSERT(iRebase <= 1); - oss << "xy"[1 - iRebase]; - } - else - if(psOperand->aui32Swizzle[0] == OPERAND_4_COMPONENT_Z) - { - ASSERT(iRebase <= 2); - oss << "xyz"[2 - iRebase]; - } - else - if(psOperand->aui32Swizzle[0] == OPERAND_4_COMPONENT_W) - { - ASSERT(iRebase <= 3); - oss << "xyzw"[3 - iRebase]; - } - } - } - return oss.str(); + if (psOperand->aui32Swizzle[0] == OPERAND_4_COMPONENT_X) + { + ASSERT(iRebase == 0); + oss << "x"; + } + else if (psOperand->aui32Swizzle[0] == OPERAND_4_COMPONENT_Y) + { + ASSERT(iRebase <= 1); + oss << "xy"[1 - iRebase]; + } + else if (psOperand->aui32Swizzle[0] == OPERAND_4_COMPONENT_Z) + { + ASSERT(iRebase <= 2); + oss << "xyz"[2 - iRebase]; + } + else if (psOperand->aui32Swizzle[0] == OPERAND_4_COMPONENT_W) + { + ASSERT(iRebase <= 3); + oss << "xyzw"[3 - iRebase]; + } + } + } + return oss.str(); } std::string ToMetal::TranslateOperandIndex(const Operand* psOperand, int index) { - int i = index; - std::ostringstream oss; - ASSERT(index < psOperand->iIndexDims); - - switch(psOperand->eIndexRep[i]) - { - case OPERAND_INDEX_IMMEDIATE32: - { - oss << "[" << psOperand->aui32ArraySizes[i] << "]"; - return oss.str(); - } - case OPERAND_INDEX_RELATIVE: - { - oss << "[" << TranslateOperand(psOperand->m_SubOperands[i].get(), TO_FLAG_INTEGER) << "]"; - return oss.str(); - } - case OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE: - { - oss << "[" << TranslateOperand(psOperand->m_SubOperands[i].get(), TO_FLAG_INTEGER) << " + "<< psOperand->aui32ArraySizes[i] <<"]"; - return oss.str(); - } - default: - { - ASSERT(0); - return ""; - break; - } - } + int i = index; + std::ostringstream oss; + ASSERT(index < psOperand->iIndexDims); + + switch (psOperand->eIndexRep[i]) + { + case OPERAND_INDEX_IMMEDIATE32: + { + oss << "[" << psOperand->aui32ArraySizes[i] << "]"; + return oss.str(); + } + case OPERAND_INDEX_RELATIVE: + { + oss << "[" << TranslateOperand(psOperand->m_SubOperands[i].get(), TO_FLAG_INTEGER) << "]"; + return oss.str(); + } + case OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE: + { + oss << "[" << TranslateOperand(psOperand->m_SubOperands[i].get(), TO_FLAG_INTEGER) << " + " << psOperand->aui32ArraySizes[i] << "]"; + return oss.str(); + } + default: + { + ASSERT(0); + return ""; + break; + } + } } /*static std::string GetBitcastOp(HLSLCrossCompilerContext *psContext, SHADER_VARIABLE_TYPE from, SHADER_VARIABLE_TYPE to, uint32_t numComponents) { - if (psContext->psShader->eTargetLanguage == LANG_METAL) - { - std::ostringstream oss; - oss << "as_type<"; - oss << GetConstructorForTypeMetal(to, numComponents); - oss << ">"; - return oss.str(); - } - else - { - if ((to == SVT_FLOAT || to == SVT_FLOAT16 || to == SVT_FLOAT10) && from == SVT_INT) - return "intBitsToFloat"; - else if ((to == SVT_FLOAT || to == SVT_FLOAT16 || to == SVT_FLOAT10) && from == SVT_UINT) - return "uintBitsToFloat"; - else if (to == SVT_INT && from == SVT_FLOAT) - return "floatBitsToInt"; - else if (to == SVT_UINT && from == SVT_FLOAT) - return "floatBitsToUint"; - } - - ASSERT(0); - return "ERROR missing components in GetBitcastOp()"; + if (psContext->psShader->eTargetLanguage == LANG_METAL) + { + std::ostringstream oss; + oss << "as_type<"; + oss << GetConstructorForTypeMetal(to, numComponents); + oss << ">"; + return oss.str(); + } + else + { + if ((to == SVT_FLOAT || to == SVT_FLOAT16 || to == SVT_FLOAT10) && from == SVT_INT) + return "intBitsToFloat"; + else if ((to == SVT_FLOAT || to == SVT_FLOAT16 || to == SVT_FLOAT10) && from == SVT_UINT) + return "uintBitsToFloat"; + else if (to == SVT_INT && from == SVT_FLOAT) + return "floatBitsToInt"; + else if (to == SVT_UINT && from == SVT_FLOAT) + return "floatBitsToUint"; + } + + ASSERT(0); + return "ERROR missing components in GetBitcastOp()"; }*/ // Helper function to print floats with full precision static std::string printFloat(float f) { - char temp[30]; + char temp[30]; - snprintf(temp, 30, "%.9g", f); - char * ePos = strchr(temp, 'e'); - char * pointPos = strchr(temp, '.'); + snprintf(temp, 30, "%.9g", f); + char * ePos = strchr(temp, 'e'); + char * pointPos = strchr(temp, '.'); - if (ePos == NULL && pointPos == NULL && !fpcheck(f)) - return std::string(temp) + ".0"; - else - return std::string(temp); + if (ePos == NULL && pointPos == NULL && !fpcheck(f)) + return std::string(temp) + ".0"; + else + return std::string(temp); } // Helper function to print out a single 32-bit immediate value in desired format static std::string printImmediate32(uint32_t value, SHADER_VARIABLE_TYPE eType) { - std::ostringstream oss; - int needsParenthesis = 0; - - // Print floats as bit patterns. - if ((eType == SVT_FLOAT || eType == SVT_FLOAT16 || eType == SVT_FLOAT10) && fpcheck(*((float *)(&value)))) - { - oss << "as_type("; - eType = SVT_INT; - needsParenthesis = 1; - } - - switch (eType) - { - default: - ASSERT(0); - case SVT_INT: - case SVT_INT16: - case SVT_INT12: - // Need special handling for anything >= uint 0x3fffffff - if (value > 0x3ffffffe) - oss << "int(0x" << std::hex << value << "u)"; - else - oss << "0x" << std::hex << value << ""; - break; - case SVT_UINT: - case SVT_UINT16: - oss << "0x" << std::hex << value << "u"; - break; - case SVT_FLOAT: - case SVT_FLOAT10: - case SVT_FLOAT16: - oss << printFloat(*((float *)(&value))); - break; - case SVT_BOOL: - if (value == 0) - oss << "false"; - else - oss << "true"; - } - if (needsParenthesis) - oss << ")"; - - return oss.str(); + std::ostringstream oss; + int needsParenthesis = 0; + + // Print floats as bit patterns. + if ((eType == SVT_FLOAT || eType == SVT_FLOAT16 || eType == SVT_FLOAT10) && fpcheck(*((float *)(&value)))) + { + oss << "as_type("; + eType = SVT_INT; + needsParenthesis = 1; + } + + switch (eType) + { + default: + ASSERT(0); + case SVT_INT: + case SVT_INT16: + case SVT_INT12: + // Need special handling for anything >= uint 0x3fffffff + if (value > 0x3ffffffe) + oss << "int(0x" << std::hex << value << "u)"; + else + oss << "0x" << std::hex << value << ""; + break; + case SVT_UINT: + case SVT_UINT16: + oss << "0x" << std::hex << value << "u"; + break; + case SVT_FLOAT: + case SVT_FLOAT10: + case SVT_FLOAT16: + oss << printFloat(*((float *)(&value))); + break; + case SVT_BOOL: + if (value == 0) + oss << "false"; + else + oss << "true"; + } + if (needsParenthesis) + oss << ")"; + + return oss.str(); } static std::string MakeCBVarName(const std::string &cbName, const std::string &fullName, bool isUnityInstancingBuffer) @@ -342,364 +338,376 @@ static std::string MakeCBVarName(const std::string &cbName, const std::string &f std::string ToMetal::TranslateVariableName(const Operand* psOperand, uint32_t ui32TOFlag, uint32_t* pui32IgnoreSwizzle, uint32_t ui32CompMask, int *piRebase) { - std::ostringstream oss; - int numParenthesis = 0; - int hasCtor = 0; - int needsBoolUpscale = 0; // If nonzero, bools need * 0xffffffff in them - SHADER_VARIABLE_TYPE requestedType = TypeFlagsToSVTType(ui32TOFlag); - SHADER_VARIABLE_TYPE eType = psOperand->GetDataType(psContext, requestedType); - int numComponents = psOperand->GetNumSwizzleElements(ui32CompMask); - int requestedComponents = 0; - int scalarWithSwizzle = 0; - - *pui32IgnoreSwizzle = 0; - - if (psOperand->eType == OPERAND_TYPE_TEMP) - { - // Check for scalar - if (psContext->psShader->GetTempComponentCount(eType, psOperand->ui32RegisterNumber) == 1 && psOperand->eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE) - { - scalarWithSwizzle = 1; // Going to need a constructor - } - } - - if (psOperand->eType == OPERAND_TYPE_INPUT) - { - // Check for scalar - // You would think checking would be easy but there is a caveat: - // checking abScalarInput might report as scalar, while in reality that was redirected and now is vector so swizzle must be preserved - // as an example consider we have input: - // float2 x; float y; - // and later on we do - // tex2D(xxx, fixed2(x.x, y)); - // in that case we will generate redirect but which ui32RegisterNumber will be used for it is not strictly "specified" - // so we may end up with treating it as scalar (even though it is vector now) - const int redirectInput = psContext->psShader->asPhases[psContext->currentPhase].acInputNeedsRedirect[psOperand->ui32RegisterNumber]; - const bool wasRedirected = redirectInput == 0xFF || redirectInput == 0xFE; - - const int scalarInput = psContext->psShader->abScalarInput[psOperand->GetRegisterSpace(psContext)][psOperand->ui32RegisterNumber]; - if (!wasRedirected && (scalarInput & psOperand->GetAccessMask()) && (psOperand->eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE)) - { - scalarWithSwizzle = 1; - *pui32IgnoreSwizzle = 1; - } - } - - if (piRebase) - *piRebase = 0; - - if (ui32TOFlag & TO_AUTO_EXPAND_TO_VEC2) - requestedComponents = 2; - else if (ui32TOFlag & TO_AUTO_EXPAND_TO_VEC3) - requestedComponents = 3; - else if (ui32TOFlag & TO_AUTO_EXPAND_TO_VEC4) - requestedComponents = 4; - - requestedComponents = std::max(requestedComponents, numComponents); - - if (!(ui32TOFlag & (TO_FLAG_DESTINATION | TO_FLAG_NAME_ONLY | TO_FLAG_DECLARATION_NAME))) - { - if (psOperand->eType == OPERAND_TYPE_IMMEDIATE32 || psOperand->eType == OPERAND_TYPE_IMMEDIATE64) - { - // Mark the operand type to match whatever we're asking for in the flags. - ((Operand *)psOperand)->aeDataType[0] = requestedType; - ((Operand *)psOperand)->aeDataType[1] = requestedType; - ((Operand *)psOperand)->aeDataType[2] = requestedType; - ((Operand *)psOperand)->aeDataType[3] = requestedType; - } - - bool bitcast = false; - if (AreTypesCompatibleMetal(eType, ui32TOFlag) == 0) - { - if (CanDoDirectCast(psContext, eType, requestedType)) - { - oss << GetConstructorForType(psContext, requestedType, requestedComponents, false) << "("; - numParenthesis++; - hasCtor = 1; - if (eType == SVT_BOOL) - needsBoolUpscale = 1; - } - else - { - // Direct cast not possible, need to do bitcast. - oss << "as_type<"<< GetConstructorForTypeMetal(requestedType, requestedComponents) << ">("; - hasCtor = 1; - bitcast = true; - numParenthesis++; - } - } - - // Add ctor if needed (upscaling). Type conversion is already handled above, so here we must - // use the original type to not make type conflicts in bitcasts - bool needsUpscaling = ((numComponents < requestedComponents)||(scalarWithSwizzle != 0)) && (hasCtor == 0 || bitcast); - - // Add constuctor if half precision is forced to avoid template ambiguity error from compiler - bool needsForcedCtor = (ui32TOFlag & TO_FLAG_FORCE_HALF) && (psOperand->eType == OPERAND_TYPE_IMMEDIATE32 || psOperand->eType == OPERAND_TYPE_IMMEDIATE64); - - if (needsForcedCtor) - requestedComponents = std::max(requestedComponents, 1); - - if (needsUpscaling || needsForcedCtor) - { - oss << GetConstructorForType(psContext, eType, requestedComponents, false) << "("; - - numParenthesis++; - hasCtor = 1; - } - } - - - switch(psOperand->eType) - { - case OPERAND_TYPE_IMMEDIATE32: - { - if(psOperand->iNumComponents == 1) - { - oss << printImmediate32(*((unsigned int*)(&psOperand->afImmediates[0])), requestedType); - } - else - { - int i; - int firstItemAdded = 0; - if (hasCtor == 0) - { - oss << GetConstructorForTypeMetal(requestedType, requestedComponents) << "("; - numParenthesis++; - hasCtor = 1; - } - for (i = 0; i < 4; i++) - { - uint32_t uval; - if (!(ui32CompMask & (1 << i))) - continue; - - if (firstItemAdded) - oss << ", "; - uval = *((uint32_t*)(&psOperand->afImmediates[i >= psOperand->iNumComponents ? psOperand->iNumComponents-1 : i])); - oss << printImmediate32(uval, requestedType); - firstItemAdded = 1; - } - oss << ")"; - *pui32IgnoreSwizzle = 1; - numParenthesis--; - } - break; - } - case OPERAND_TYPE_IMMEDIATE64: - { - ASSERT(0); // doubles not supported on Metal - break; - } - case OPERAND_TYPE_INPUT: - { - int regSpace = psOperand->GetRegisterSpace(psContext); - switch(psOperand->iIndexDims) - { - case INDEX_2D: - { - const ShaderInfo::InOutSignature *psSig = NULL; - psContext->psShader->sInfo.GetInputSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->ui32CompMask, &psSig); - if (psContext->psShader->eShaderType == HULL_SHADER || psContext->psShader->eShaderType == DOMAIN_SHADER) - { - oss << "input.cp"; - oss << TranslateOperandIndex(psOperand, 0);//Vertex index - oss << "." << psContext->GetDeclaredInputName(psOperand, piRebase, 1, pui32IgnoreSwizzle); - } - else - { - // Not sure if this codepath is active outside hull/domain - oss << psContext->GetDeclaredInputName(psOperand, piRebase, 0, pui32IgnoreSwizzle); - - oss << TranslateOperandIndex(psOperand, 0);//Vertex index - } - break; - } - default: - { - if(psOperand->eIndexRep[0] == OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE) - { - ASSERT(psContext->psShader->aIndexedInput[regSpace][psOperand->ui32RegisterNumber] != 0); - oss << "phase" << psContext->currentPhase << "_Input" << regSpace << "_" << psOperand->ui32RegisterNumber << "["; - oss << TranslateOperand(psOperand->m_SubOperands[0].get(), TO_FLAG_INTEGER); - oss << "]"; - } - else - { - if(psContext->psShader->aIndexedInput[regSpace][psOperand->ui32RegisterNumber] != 0) - { - const uint32_t parentIndex = psContext->psShader->aIndexedInputParents[regSpace][psOperand->ui32RegisterNumber]; - oss << "phase" << psContext->currentPhase << "_Input" << regSpace << "_" << parentIndex << "[" << (psOperand->ui32RegisterNumber - parentIndex) << "]"; - } - else - { - oss << psContext->GetDeclaredInputName(psOperand, piRebase, 0, pui32IgnoreSwizzle); - } - } - break; - } - } - break; - } - case OPERAND_TYPE_OUTPUT: - case OPERAND_TYPE_OUTPUT_DEPTH: - case OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL: - case OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL: - { - - int stream = 0; - oss << psContext->GetDeclaredOutputName(psOperand, &stream, pui32IgnoreSwizzle, piRebase, 0); - if (psOperand->m_SubOperands[0].get()) - { - oss << "["; - oss << TranslateOperand(psOperand->m_SubOperands[0].get(), TO_AUTO_BITCAST_TO_INT); - oss << "]"; - } - break; - } - case OPERAND_TYPE_TEMP: - { - SHADER_VARIABLE_TYPE eTempType = psOperand->GetDataType(psContext); - oss << HLSLCC_TEMP_PREFIX; - ASSERT(psOperand->ui32RegisterNumber < 0x10000); // Sanity check after temp splitting. - switch (eTempType) - { - case SVT_FLOAT: - ASSERT(psContext->psShader->psFloatTempSizes[psOperand->ui32RegisterNumber] != 0); - if (psContext->psShader->psFloatTempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - break; - case SVT_FLOAT16: - ASSERT(psContext->psShader->psFloat16TempSizes[psOperand->ui32RegisterNumber] != 0); - oss << ("16_"); - if (psContext->psShader->psFloat16TempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - break; - case SVT_FLOAT10: - ASSERT(psContext->psShader->psFloat10TempSizes[psOperand->ui32RegisterNumber] != 0); - oss << ("10_"); - if (psContext->psShader->psFloat10TempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - break; - case SVT_INT: - ASSERT(psContext->psShader->psIntTempSizes[psOperand->ui32RegisterNumber] != 0); - oss << ("i"); - if (psContext->psShader->psIntTempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - break; - case SVT_INT16: - ASSERT(psContext->psShader->psInt16TempSizes[psOperand->ui32RegisterNumber] != 0); - oss << ("i16_"); - if (psContext->psShader->psInt16TempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - break; - case SVT_INT12: - ASSERT(psContext->psShader->psInt12TempSizes[psOperand->ui32RegisterNumber] != 0); - oss << ("i12_"); - if (psContext->psShader->psInt12TempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - break; - case SVT_UINT: - ASSERT(psContext->psShader->psUIntTempSizes[psOperand->ui32RegisterNumber] != 0); - oss << ("u"); - if (psContext->psShader->psUIntTempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - break; - case SVT_UINT16: - ASSERT(psContext->psShader->psUInt16TempSizes[psOperand->ui32RegisterNumber] != 0); - oss << ("u16_"); - if (psContext->psShader->psUInt16TempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - break; - case SVT_DOUBLE: - ASSERT(psContext->psShader->psDoubleTempSizes[psOperand->ui32RegisterNumber] != 0); - oss << ("d"); - if (psContext->psShader->psDoubleTempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - break; - case SVT_BOOL: - ASSERT(psContext->psShader->psBoolTempSizes[psOperand->ui32RegisterNumber] != 0); - oss << ("b"); - if (psContext->psShader->psBoolTempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - break; - default: - ASSERT(0 && "Should never get here!"); - } - oss << psOperand->ui32RegisterNumber; - break; - } - case OPERAND_TYPE_SPECIAL_IMMCONSTINT: - case OPERAND_TYPE_SPECIAL_IMMCONST: - case OPERAND_TYPE_SPECIAL_OUTBASECOLOUR: - case OPERAND_TYPE_SPECIAL_OUTOFFSETCOLOUR: - case OPERAND_TYPE_SPECIAL_FOG: - case OPERAND_TYPE_SPECIAL_ADDRESS: - case OPERAND_TYPE_SPECIAL_LOOPCOUNTER: - case OPERAND_TYPE_SPECIAL_TEXCOORD: - { - ASSERT(0 && "DX9 shaders no longer supported!"); - break; - } - case OPERAND_TYPE_SPECIAL_POSITION: - { - ASSERT(0 && "TODO normal shader support"); -// bcatcstr(glsl, "gl_Position"); - break; - } - case OPERAND_TYPE_SPECIAL_POINTSIZE: - { - ASSERT(0 && "TODO normal shader support"); - // bcatcstr(glsl, "gl_PointSize"); - break; - } - case OPERAND_TYPE_CONSTANT_BUFFER: - { - const ConstantBuffer* psCBuf = NULL; - const ShaderVarType* psVarType = NULL; - int32_t index = -1; - std::vector arrayIndices; - bool isArray = false; - bool isFBInput = false; - psContext->psShader->sInfo.GetConstantBufferFromBindingPoint(RGROUP_CBUFFER, psOperand->aui32ArraySizes[0], &psCBuf); - ASSERT(psCBuf != NULL); - - if(ui32TOFlag & TO_FLAG_DECLARATION_NAME) - { - pui32IgnoreSwizzle[0] = 1; - } - std::string cbName = ""; - if(psCBuf) - { - //$Globals. - cbName = GetCBName(psCBuf->name); - cbName += "."; - // Drop the constant buffer name from subpass inputs - if (cbName.substr(0, 19) == "hlslcc_SubpassInput") - cbName = ""; - } - - if((ui32TOFlag & TO_FLAG_DECLARATION_NAME) != TO_FLAG_DECLARATION_NAME) - { - //Work out the variable name. Don't apply swizzle to that variable yet. - int32_t rebase = 0; - - ASSERT(psCBuf != NULL); - - uint32_t componentsNeeded = 1; - if (psOperand->eSelMode != OPERAND_4_COMPONENT_SELECT_1_MODE) - { - uint32_t minSwiz = 3; - uint32_t maxSwiz = 0; - int i; - for (i = 0; i < 4; i++) - { - if ((ui32CompMask & (1 << i)) == 0) - continue; - minSwiz = std::min(minSwiz, psOperand->aui32Swizzle[i]); - maxSwiz = std::max(maxSwiz, psOperand->aui32Swizzle[i]); - } - componentsNeeded = maxSwiz - minSwiz + 1; - } + std::ostringstream oss; + int numParenthesis = 0; + int hasCtor = 0; + int needsBoolUpscale = 0; // If nonzero, bools need * 0xffffffff in them + SHADER_VARIABLE_TYPE requestedType = TypeFlagsToSVTType(ui32TOFlag); + SHADER_VARIABLE_TYPE eType = psOperand->GetDataType(psContext, requestedType); + int numComponents = psOperand->GetNumSwizzleElements(ui32CompMask); + int requestedComponents = 0; + int scalarWithSwizzle = 0; + + *pui32IgnoreSwizzle = 0; + + if (psOperand->eType == OPERAND_TYPE_TEMP) + { + // Check for scalar + if (psContext->psShader->GetTempComponentCount(eType, psOperand->ui32RegisterNumber) == 1 && psOperand->eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE) + { + scalarWithSwizzle = 1; // Going to need a constructor + } + } + + if (psOperand->eType == OPERAND_TYPE_INPUT) + { + // Check for scalar + // You would think checking would be easy but there is a caveat: + // checking abScalarInput might report as scalar, while in reality that was redirected and now is vector so swizzle must be preserved + // as an example consider we have input: + // float2 x; float y; + // and later on we do + // tex2D(xxx, fixed2(x.x, y)); + // in that case we will generate redirect but which ui32RegisterNumber will be used for it is not strictly "specified" + // so we may end up with treating it as scalar (even though it is vector now) + const int redirectInput = psContext->psShader->asPhases[psContext->currentPhase].acInputNeedsRedirect[psOperand->ui32RegisterNumber]; + const bool wasRedirected = redirectInput == 0xFF || redirectInput == 0xFE; + + const int scalarInput = psContext->psShader->abScalarInput[psOperand->GetRegisterSpace(psContext)][psOperand->ui32RegisterNumber]; + if (!wasRedirected && (scalarInput & psOperand->GetAccessMask()) && (psOperand->eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE)) + { + scalarWithSwizzle = 1; + *pui32IgnoreSwizzle = 1; + } + } + + if (piRebase) + *piRebase = 0; + + if (ui32TOFlag & TO_AUTO_EXPAND_TO_VEC2) + requestedComponents = 2; + else if (ui32TOFlag & TO_AUTO_EXPAND_TO_VEC3) + requestedComponents = 3; + else if (ui32TOFlag & TO_AUTO_EXPAND_TO_VEC4) + requestedComponents = 4; + + requestedComponents = std::max(requestedComponents, numComponents); + + if (!(ui32TOFlag & (TO_FLAG_DESTINATION | TO_FLAG_NAME_ONLY | TO_FLAG_DECLARATION_NAME))) + { + if (psOperand->eType == OPERAND_TYPE_IMMEDIATE32 || psOperand->eType == OPERAND_TYPE_IMMEDIATE64) + { + // Mark the operand type to match whatever we're asking for in the flags. + ((Operand *)psOperand)->aeDataType[0] = requestedType; + ((Operand *)psOperand)->aeDataType[1] = requestedType; + ((Operand *)psOperand)->aeDataType[2] = requestedType; + ((Operand *)psOperand)->aeDataType[3] = requestedType; + } + + bool bitcast = false; + if (AreTypesCompatibleMetal(eType, ui32TOFlag) == 0) + { + if (CanDoDirectCast(psContext, eType, requestedType)) + { + hasCtor = 1; + if (eType == SVT_BOOL) + { + needsBoolUpscale = 1; + // make sure to wrap the whole thing in parens so the upscale + // multiply only applies to the bool + oss << "("; + numParenthesis++; + } + oss << GetConstructorForType(psContext, requestedType, requestedComponents, false) << "("; + numParenthesis++; + } + else + { + // Direct cast not possible, need to do bitcast. + oss << "as_type<" << GetConstructorForTypeMetal(requestedType, requestedComponents) << ">("; + hasCtor = 1; + bitcast = true; + numParenthesis++; + } + } + + // Add ctor if needed (upscaling). Type conversion is already handled above, so here we must + // use the original type to not make type conflicts in bitcasts + bool needsUpscaling = ((numComponents < requestedComponents) || (scalarWithSwizzle != 0)) && (hasCtor == 0 || bitcast); + + // Add constuctor if half precision is forced to avoid template ambiguity error from compiler + bool needsForcedCtor = (ui32TOFlag & TO_FLAG_FORCE_HALF) && (psOperand->eType == OPERAND_TYPE_IMMEDIATE32 || psOperand->eType == OPERAND_TYPE_IMMEDIATE64); + + if (needsForcedCtor) + requestedComponents = std::max(requestedComponents, 1); + + if (needsUpscaling || needsForcedCtor) + { + oss << GetConstructorForType(psContext, eType, requestedComponents, false) << "("; + + numParenthesis++; + hasCtor = 1; + } + } + + + switch (psOperand->eType) + { + case OPERAND_TYPE_IMMEDIATE32: + { + if (psOperand->iNumComponents == 1) + { + oss << printImmediate32(*((unsigned int*)(&psOperand->afImmediates[0])), requestedType); + } + else + { + int i; + int firstItemAdded = 0; + if (hasCtor == 0) + { + oss << GetConstructorForTypeMetal(requestedType, requestedComponents) << "("; + numParenthesis++; + hasCtor = 1; + } + for (i = 0; i < 4; i++) + { + uint32_t uval; + if (!(ui32CompMask & (1 << i))) + continue; + + if (firstItemAdded) + oss << ", "; + uval = *((uint32_t*)(&psOperand->afImmediates[i >= psOperand->iNumComponents ? psOperand->iNumComponents - 1 : i])); + oss << printImmediate32(uval, requestedType); + firstItemAdded = 1; + } + oss << ")"; + *pui32IgnoreSwizzle = 1; + numParenthesis--; + } + break; + } + case OPERAND_TYPE_IMMEDIATE64: + { + ASSERT(0); // doubles not supported on Metal + break; + } + case OPERAND_TYPE_INPUT: + { + int regSpace = psOperand->GetRegisterSpace(psContext); + switch (psOperand->iIndexDims) + { + case INDEX_2D: + { + const ShaderInfo::InOutSignature *psSig = NULL; + psContext->psShader->sInfo.GetInputSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->ui32CompMask, &psSig); + if (psContext->psShader->eShaderType == HULL_SHADER || psContext->psShader->eShaderType == DOMAIN_SHADER) + { + oss << "input.cp"; + oss << TranslateOperandIndex(psOperand, 0);//Vertex index + oss << "." << psContext->GetDeclaredInputName(psOperand, piRebase, 1, pui32IgnoreSwizzle); + } + else + { + // Not sure if this codepath is active outside hull/domain + oss << psContext->GetDeclaredInputName(psOperand, piRebase, 0, pui32IgnoreSwizzle); + + oss << TranslateOperandIndex(psOperand, 0);//Vertex index + } + break; + } + default: + { + if (psOperand->eIndexRep[0] == OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE) + { + ASSERT(psContext->psShader->aIndexedInput[regSpace][psOperand->ui32RegisterNumber] != 0); + oss << "phase" << psContext->currentPhase << "_Input" << regSpace << "_" << psOperand->ui32RegisterNumber << "["; + oss << TranslateOperand(psOperand->m_SubOperands[0].get(), TO_FLAG_INTEGER); + oss << "]"; + } + else + { + if (psContext->psShader->aIndexedInput[regSpace][psOperand->ui32RegisterNumber] != 0) + { + const uint32_t parentIndex = psContext->psShader->aIndexedInputParents[regSpace][psOperand->ui32RegisterNumber]; + oss << "phase" << psContext->currentPhase << "_Input" << regSpace << "_" << parentIndex << "[" << (psOperand->ui32RegisterNumber - parentIndex) << "]"; + } + else + { + oss << psContext->GetDeclaredInputName(psOperand, piRebase, 0, pui32IgnoreSwizzle); + } + } + break; + } + } + break; + } + case OPERAND_TYPE_OUTPUT: + case OPERAND_TYPE_OUTPUT_DEPTH: + case OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL: + case OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL: + { + int stream = 0; + oss << psContext->GetDeclaredOutputName(psOperand, &stream, pui32IgnoreSwizzle, piRebase, 0); + if (psOperand->m_SubOperands[0].get()) + { + oss << "["; + oss << TranslateOperand(psOperand->m_SubOperands[0].get(), TO_AUTO_BITCAST_TO_INT); + oss << "]"; + } + break; + } + case OPERAND_TYPE_TEMP: + { + SHADER_VARIABLE_TYPE eTempType = psOperand->GetDataType(psContext); + + if (psOperand->eSpecialName == NAME_UNDEFINED && psOperand->specialName.length()) + { + oss << psOperand->specialName; + break; + } + + oss << HLSLCC_TEMP_PREFIX; + ASSERT(psOperand->ui32RegisterNumber < 0x10000); // Sanity check after temp splitting. + switch (eTempType) + { + case SVT_FLOAT: + ASSERT(psContext->psShader->psFloatTempSizes[psOperand->ui32RegisterNumber] != 0); + if (psContext->psShader->psFloatTempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + break; + case SVT_FLOAT16: + ASSERT(psContext->psShader->psFloat16TempSizes[psOperand->ui32RegisterNumber] != 0); + oss << ("16_"); + if (psContext->psShader->psFloat16TempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + break; + case SVT_FLOAT10: + ASSERT(psContext->psShader->psFloat10TempSizes[psOperand->ui32RegisterNumber] != 0); + oss << ("10_"); + if (psContext->psShader->psFloat10TempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + break; + case SVT_INT: + ASSERT(psContext->psShader->psIntTempSizes[psOperand->ui32RegisterNumber] != 0); + oss << ("i"); + if (psContext->psShader->psIntTempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + break; + case SVT_INT16: + ASSERT(psContext->psShader->psInt16TempSizes[psOperand->ui32RegisterNumber] != 0); + oss << ("i16_"); + if (psContext->psShader->psInt16TempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + break; + case SVT_INT12: + ASSERT(psContext->psShader->psInt12TempSizes[psOperand->ui32RegisterNumber] != 0); + oss << ("i12_"); + if (psContext->psShader->psInt12TempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + break; + case SVT_UINT: + ASSERT(psContext->psShader->psUIntTempSizes[psOperand->ui32RegisterNumber] != 0); + oss << ("u"); + if (psContext->psShader->psUIntTempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + break; + case SVT_UINT16: + ASSERT(psContext->psShader->psUInt16TempSizes[psOperand->ui32RegisterNumber] != 0); + oss << ("u16_"); + if (psContext->psShader->psUInt16TempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + break; + case SVT_DOUBLE: + ASSERT(psContext->psShader->psDoubleTempSizes[psOperand->ui32RegisterNumber] != 0); + oss << ("d"); + if (psContext->psShader->psDoubleTempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + break; + case SVT_BOOL: + ASSERT(psContext->psShader->psBoolTempSizes[psOperand->ui32RegisterNumber] != 0); + oss << ("b"); + if (psContext->psShader->psBoolTempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + break; + default: + ASSERT(0 && "Should never get here!"); + } + oss << psOperand->ui32RegisterNumber; + break; + } + case OPERAND_TYPE_SPECIAL_IMMCONSTINT: + case OPERAND_TYPE_SPECIAL_IMMCONST: + case OPERAND_TYPE_SPECIAL_OUTBASECOLOUR: + case OPERAND_TYPE_SPECIAL_OUTOFFSETCOLOUR: + case OPERAND_TYPE_SPECIAL_FOG: + case OPERAND_TYPE_SPECIAL_ADDRESS: + case OPERAND_TYPE_SPECIAL_LOOPCOUNTER: + case OPERAND_TYPE_SPECIAL_TEXCOORD: + { + ASSERT(0 && "DX9 shaders no longer supported!"); + break; + } + case OPERAND_TYPE_SPECIAL_POSITION: + { + ASSERT(0 && "TODO normal shader support"); +// bcatcstr(glsl, "gl_Position"); + break; + } + case OPERAND_TYPE_SPECIAL_POINTSIZE: + { + ASSERT(0 && "TODO normal shader support"); + // bcatcstr(glsl, "gl_PointSize"); + break; + } + case OPERAND_TYPE_CONSTANT_BUFFER: + { + const ConstantBuffer* psCBuf = NULL; + const ShaderVarType* psVarType = NULL; + int32_t index = -1; + std::vector arrayIndices; + bool isArray = false; + bool isFBInput = false; + psContext->psShader->sInfo.GetConstantBufferFromBindingPoint(RGROUP_CBUFFER, psOperand->aui32ArraySizes[0], &psCBuf); + ASSERT(psCBuf != NULL); + + if (ui32TOFlag & TO_FLAG_DECLARATION_NAME) + { + pui32IgnoreSwizzle[0] = 1; + } + std::string cbName = ""; + if (psCBuf) + { + //$Globals. + cbName = GetCBName(psCBuf->name); + cbName += "."; + // Drop the constant buffer name from subpass inputs + if (cbName.substr(0, 19) == "hlslcc_SubpassInput") + cbName = ""; + } + + if ((ui32TOFlag & TO_FLAG_DECLARATION_NAME) != TO_FLAG_DECLARATION_NAME) + { + //Work out the variable name. Don't apply swizzle to that variable yet. + int32_t rebase = 0; + + ASSERT(psCBuf != NULL); + + uint32_t componentsNeeded = 1; + if (psOperand->eSelMode != OPERAND_4_COMPONENT_SELECT_1_MODE) + { + uint32_t minSwiz = 3; + uint32_t maxSwiz = 0; + int i; + for (i = 0; i < 4; i++) + { + if ((ui32CompMask & (1 << i)) == 0) + continue; + minSwiz = std::min(minSwiz, psOperand->aui32Swizzle[i]); + maxSwiz = std::max(maxSwiz, psOperand->aui32Swizzle[i]); + } + componentsNeeded = maxSwiz - minSwiz + 1; + } // When we have a component mask that doesn't have .x set (this basically only happens when we manually open operands into components) // We have to pull down the swizzle array to match the first bit that's actually set @@ -731,88 +739,88 @@ std::string ToMetal::TranslateVariableName(const Operand* psOperand, uint32_t ui dynamicIndexStr = TranslateOperand(psDynIndexOp, opFlags, 0x1); // Just take the first component for the index } - if (psOperand->eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE || (componentsNeeded <= psVarType->Columns)) - { - // Simple case: just access one component - std::string fullName = ShaderInfo::GetShaderVarIndexedFullName(psVarType, arrayIndices, dynamicIndexStr, needsIndexCalcRevert, psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES); - - // Special hack for MSAA subpass inputs: in Metal we can only read the "current" sample, so ignore the index - if (strncmp(fullName.c_str(), "hlslcc_fbinput", 14) == 0) - isFBInput = true; - - if (((psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) != 0) && ((psVarType->Class == SVC_MATRIX_ROWS) || (psVarType->Class == SVC_MATRIX_COLUMNS))) - { - // We'll need to add the prefix only to the last section of the name - size_t commaPos = fullName.find_last_of('.'); - char prefix[256]; - sprintf(prefix, HLSLCC_TRANSLATE_MATRIX_FORMAT_STRING, psVarType->Rows, psVarType->Columns); - if (commaPos == std::string::npos) - fullName.insert(0, prefix); - else - fullName.insert(commaPos + 1, prefix); - } + if (psOperand->eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE || (componentsNeeded <= psVarType->Columns)) + { + // Simple case: just access one component + std::string fullName = ShaderInfo::GetShaderVarIndexedFullName(psVarType, arrayIndices, dynamicIndexStr, needsIndexCalcRevert, psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES); + + // Special hack for MSAA subpass inputs: in Metal we can only read the "current" sample, so ignore the index + if (strncmp(fullName.c_str(), "hlslcc_fbinput", 14) == 0) + isFBInput = true; + + if (((psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) != 0) && ((psVarType->Class == SVC_MATRIX_ROWS) || (psVarType->Class == SVC_MATRIX_COLUMNS))) + { + // We'll need to add the prefix only to the last section of the name + size_t commaPos = fullName.find_last_of('.'); + char prefix[256]; + sprintf(prefix, HLSLCC_TRANSLATE_MATRIX_FORMAT_STRING, psVarType->Rows, psVarType->Columns); + if (commaPos == std::string::npos) + fullName.insert(0, prefix); + else + fullName.insert(commaPos + 1, prefix); + } oss << MakeCBVarName(cbName, fullName, isUnityInstancingBuffer); - } - else - { - // Non-simple case: build vec4 and apply mask - uint32_t i; - int32_t tmpRebase; - std::vector tmpArrayIndices; - bool tmpIsArray; - int firstItemAdded = 0; - - oss << GetConstructorForTypeMetal(psVarType->Type, GetNumberBitsSet(ui32CompMask)) << "("; - for (i = 0; i < 4; i++) - { - const ShaderVarType *tmpVarType = NULL; - if ((ui32CompMask & (1 << i)) == 0) - continue; - tmpRebase = 0; - if (firstItemAdded != 0) - oss << ", "; - else - firstItemAdded = 1; - - uint32_t tmpSwizzle[4] = { 0 }; - std::copy(&psOperand->aui32Swizzle[i], &psOperand->aui32Swizzle[4], &tmpSwizzle[0]); - - ShaderInfo::GetShaderVarFromOffset(psOperand->aui32ArraySizes[1], tmpSwizzle, psCBuf, &tmpVarType, &tmpIsArray, &tmpArrayIndices, &tmpRebase, psContext->flags); - std::string fullName = ShaderInfo::GetShaderVarIndexedFullName(tmpVarType, tmpArrayIndices, dynamicIndexStr, needsIndexCalcRevert, psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES); + } + else + { + // Non-simple case: build vec4 and apply mask + uint32_t i; + int32_t tmpRebase; + std::vector tmpArrayIndices; + bool tmpIsArray; + int firstItemAdded = 0; + + oss << GetConstructorForTypeMetal(psVarType->Type, GetNumberBitsSet(ui32CompMask)) << "("; + for (i = 0; i < 4; i++) + { + const ShaderVarType *tmpVarType = NULL; + if ((ui32CompMask & (1 << i)) == 0) + continue; + tmpRebase = 0; + if (firstItemAdded != 0) + oss << ", "; + else + firstItemAdded = 1; + + uint32_t tmpSwizzle[4] = { 0 }; + std::copy(&psOperand->aui32Swizzle[i], &psOperand->aui32Swizzle[4], &tmpSwizzle[0]); + + ShaderInfo::GetShaderVarFromOffset(psOperand->aui32ArraySizes[1], tmpSwizzle, psCBuf, &tmpVarType, &tmpIsArray, &tmpArrayIndices, &tmpRebase, psContext->flags); + std::string fullName = ShaderInfo::GetShaderVarIndexedFullName(tmpVarType, tmpArrayIndices, dynamicIndexStr, needsIndexCalcRevert, psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES); oss << MakeCBVarName(cbName, fullName, isUnityInstancingBuffer); - if (tmpVarType->Class != SVC_SCALAR) - { - uint32_t swizzle; - tmpRebase /= 4; // 0 => 0, 4 => 1, 8 => 2, 12 /= 3 - swizzle = psOperand->aui32Swizzle[i] - tmpRebase; + if (tmpVarType->Class != SVC_SCALAR) + { + uint32_t swizzle; + tmpRebase /= 4; // 0 => 0, 4 => 1, 8 => 2, 12 /= 3 + swizzle = psOperand->aui32Swizzle[i] - tmpRebase; - oss << "." << ("xyzw"[swizzle]); - } - } - oss << ")"; - // Clear rebase, we've already done it. - rebase = 0; - // Also swizzle. - *pui32IgnoreSwizzle = 1; - } + oss << "." << ("xyzw"[swizzle]); + } + } + oss << ")"; + // Clear rebase, we've already done it. + rebase = 0; + // Also swizzle. + *pui32IgnoreSwizzle = 1; + } - if (isArray) - { - index = arrayIndices.back(); + if (isArray) + { + index = arrayIndices.back(); // Dynamic index is atm supported only at the root array level. Add here only if there is no such parent. bool hasDynamicIndex = !dynamicIndexStr.empty() && (arrayIndices.size() <= 1); bool hasImmediateIndex = (index != -1) && !(hasDynamicIndex && index == 0); - // Ignore index altogether on fb inputs - if (isFBInput) - { - // Nothing to do here - } - else if (hasDynamicIndex || hasImmediateIndex) + // Ignore index altogether on fb inputs + if (isFBInput) + { + // Nothing to do here + } + else if (hasDynamicIndex || hasImmediateIndex) { std::ostringstream fullIndexOss; if (hasDynamicIndex && hasImmediateIndex) @@ -830,425 +838,440 @@ std::string ToMetal::TranslateVariableName(const Operand* psOperand, uint32_t ui } else // This path is atm the default { - oss << "[" << fullIndexOss.str() << "]"; + oss << "[" << fullIndexOss.str() << "]"; } } } - if(psVarType && psVarType->Class == SVC_VECTOR && !*pui32IgnoreSwizzle) - { - switch(rebase) - { - case 4: - { - if(psVarType->Columns == 2) - { - //.x(GLSL) is .y(HLSL). .y(GLSL) is .z(HLSL) - oss << ".xxyx"; - } - else if(psVarType->Columns == 3) - { - //.x(GLSL) is .y(HLSL). .y(GLSL) is .z(HLSL) .z(GLSL) is .w(HLSL) - oss << ".xxyz"; - } - break; - } - case 8: - { - if(psVarType->Columns == 2) - { - //.x(GLSL) is .z(HLSL). .y(GLSL) is .w(HLSL) - oss << ".xxxy"; - } - break; - } - case 0: - default: - { - //No rebase, but extend to vec4. - if(psVarType->Columns == 2) - { - oss << ".xyxx"; - } - else if(psVarType->Columns == 3) - { - oss << ".xyzx"; - } - break; - } - - } - } - - if(psVarType && psVarType->Class == SVC_SCALAR) - { - *pui32IgnoreSwizzle = 1; - - // CB arrays are all declared as 4-component vectors to match DX11 data layout. - // Therefore add swizzle here to access the element corresponding to the scalar var. - if ((psVarType->Elements > 0) && (psContext->psShader->eShaderType == COMPUTE_SHADER)) - { - oss << ".x"; - } - } - } - break; - } - case OPERAND_TYPE_RESOURCE: - { - oss << ResourceName(RGROUP_TEXTURE, psOperand->ui32RegisterNumber); - *pui32IgnoreSwizzle = 1; - break; - } - case OPERAND_TYPE_SAMPLER: - { - oss << ResourceName(RGROUP_SAMPLER, psOperand->ui32RegisterNumber); - *pui32IgnoreSwizzle = 1; - break; - } - case OPERAND_TYPE_FUNCTION_BODY: - { - ASSERT(0); - break; - } - case OPERAND_TYPE_INPUT_FORK_INSTANCE_ID: - case OPERAND_TYPE_INPUT_JOIN_INSTANCE_ID: - { - oss << "phaseInstanceID"; // Not a real builtin, but passed as a function parameter. - *pui32IgnoreSwizzle = 1; - break; - } - case OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER: - { - oss << "ImmCB_" << psContext->currentPhase; - oss << TranslateOperandIndex(psOperand, 0); - break; - } - case OPERAND_TYPE_INPUT_DOMAIN_POINT: - { - oss << "mtl_TessCoord"; - break; - } - case OPERAND_TYPE_INPUT_CONTROL_POINT: - { - int ignoreRedirect = 1; - int regSpace = psOperand->GetRegisterSpace(psContext); - - if ((regSpace == 0 && psContext->psShader->asPhases[psContext->currentPhase].acInputNeedsRedirect[psOperand->ui32RegisterNumber] == 0xfe) || - (regSpace == 1 && psContext->psShader->asPhases[psContext->currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] == 0xfe)) - { - ignoreRedirect = 0; - } - - if (ignoreRedirect) - { - oss << "input.cp"; - oss << TranslateOperandIndex(psOperand, 0);//Vertex index - oss << "." << psContext->GetDeclaredInputName(psOperand, piRebase, ignoreRedirect, pui32IgnoreSwizzle); - } - else - { - oss << psContext->GetDeclaredInputName(psOperand, piRebase, ignoreRedirect, pui32IgnoreSwizzle); - oss << TranslateOperandIndex(psOperand, 0);//Vertex index - } - - // Check for scalar - if ((psContext->psShader->abScalarInput[psOperand->GetRegisterSpace(psContext)][psOperand->ui32RegisterNumber] & psOperand->GetAccessMask()) != 0) - *pui32IgnoreSwizzle = 1; - break; - } - case OPERAND_TYPE_NULL: - { - // Null register, used to discard results of operations - oss << "//null"; - break; - } - case OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID: - { - oss << "controlPointID"; - *pui32IgnoreSwizzle = 1; - break; - } - case OPERAND_TYPE_OUTPUT_COVERAGE_MASK: - { - oss << "mtl_CoverageMask"; - *pui32IgnoreSwizzle = 1; - break; - } - case OPERAND_TYPE_INPUT_COVERAGE_MASK: - { - oss << "mtl_CoverageMask"; - //Skip swizzle on scalar types. - *pui32IgnoreSwizzle = 1; - break; - } - case OPERAND_TYPE_INPUT_THREAD_ID://SV_DispatchThreadID - { - oss << "mtl_ThreadID"; - break; - } - case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP://SV_GroupThreadID - { - oss << "mtl_ThreadIDInGroup"; - break; - } - case OPERAND_TYPE_INPUT_THREAD_GROUP_ID://SV_GroupID - { - oss << "mtl_ThreadGroupID"; - break; - } - case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED://SV_GroupIndex - { - oss << "mtl_ThreadIndexInThreadGroup"; - *pui32IgnoreSwizzle = 1; // No swizzle meaningful for scalar. - break; - } - case OPERAND_TYPE_UNORDERED_ACCESS_VIEW: - { - oss << ResourceName(RGROUP_UAV, psOperand->ui32RegisterNumber); - *pui32IgnoreSwizzle = 1; - break; - } - case OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY: - { - oss << "TGSM" << psOperand->ui32RegisterNumber; - *pui32IgnoreSwizzle = 1; - break; - } - case OPERAND_TYPE_INPUT_PRIMITIVEID: - { - // Not supported on Metal - ASSERT(0); - break; - } - case OPERAND_TYPE_INDEXABLE_TEMP: - { - oss << "TempArray" << psOperand->aui32ArraySizes[0] << "["; - if (psOperand->aui32ArraySizes[1] != 0 || !psOperand->m_SubOperands[1].get()) - oss << psOperand->aui32ArraySizes[1]; - - if(psOperand->m_SubOperands[1].get()) - { - if (psOperand->aui32ArraySizes[1] != 0) - oss << "+"; - oss << TranslateOperand(psOperand->m_SubOperands[1].get(), TO_FLAG_INTEGER); - - } - oss << "]"; - break; - } - case OPERAND_TYPE_STREAM: - { - // Not supported on Metal - ASSERT(0); - break; - } - case OPERAND_TYPE_INPUT_GS_INSTANCE_ID: - { - // Not supported on Metal - ASSERT(0); - break; - } - case OPERAND_TYPE_THIS_POINTER: - { - ASSERT(0); // Nope. - break; - } - case OPERAND_TYPE_INPUT_PATCH_CONSTANT: - { - const ShaderInfo::InOutSignature* psIn; - psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->GetAccessMask(), &psIn); - *piRebase = psIn->iRebase; - switch (psIn->eSystemValueType) - { - case NAME_POSITION: - oss << "mtl_Position"; - break; - case NAME_RENDER_TARGET_ARRAY_INDEX: - oss << "mtl_Layer"; - *pui32IgnoreSwizzle = 1; - break; - case NAME_CLIP_DISTANCE: - // this is temp variable, declaration and redirecting to actual output is handled in DeclareClipPlanes - char tmpName[128]; sprintf(tmpName, "phase%d_ClipDistance%d", psContext->currentPhase, psIn->ui32SemanticIndex); - oss << tmpName; - *pui32IgnoreSwizzle = 1; - break; - case NAME_VIEWPORT_ARRAY_INDEX: - oss << "mtl_ViewPortIndex"; - *pui32IgnoreSwizzle = 1; - break; - case NAME_VERTEX_ID: - oss << "mtl_VertexID"; - *pui32IgnoreSwizzle = 1; - break; - case NAME_INSTANCE_ID: - oss << "mtl_InstanceID"; - *pui32IgnoreSwizzle = 1; - break; - case NAME_IS_FRONT_FACE: - oss << "(mtl_FrontFace ? 0xffffffffu : uint(0))"; - *pui32IgnoreSwizzle = 1; - break; - case NAME_PRIMITIVE_ID: - // Not on Metal - ASSERT(0); - break; - case NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR: - case NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR: - case NAME_FINAL_LINE_DENSITY_TESSFACTOR: - if (psContext->psShader->aIndexedOutput[1][psOperand->ui32RegisterNumber]) - oss << "edgeTessellationFactor"; - else - oss << "edgeTessellationFactor[0]"; - *pui32IgnoreSwizzle = 1; - break; - case NAME_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR: - case NAME_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR: - case NAME_FINAL_LINE_DETAIL_TESSFACTOR: - oss << "edgeTessellationFactor[1]"; - *pui32IgnoreSwizzle = 1; - break; - case NAME_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR: - case NAME_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR: - oss << "edgeTessellationFactor[2]"; - *pui32IgnoreSwizzle = 1; - break; - case NAME_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR: - oss << "edgeTessellationFactor[3]"; - *pui32IgnoreSwizzle = 1; - break; - case NAME_FINAL_TRI_INSIDE_TESSFACTOR: - case NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR: - if (psContext->psShader->aIndexedOutput[1][psOperand->ui32RegisterNumber]) - oss << "insideTessellationFactor"; - else - oss << "insideTessellationFactor[0]"; - *pui32IgnoreSwizzle = 1; - break; - case NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR: - oss << "insideTessellationFactor[1]"; - *pui32IgnoreSwizzle = 1; - break; - default: - const std::string patchPrefix = "patch."; - - if (psContext->psShader->eShaderType == DOMAIN_SHADER) - oss << psContext->inputPrefix << patchPrefix << psIn->semanticName << psIn->ui32SemanticIndex; - else - oss << patchPrefix << psIn->semanticName << psIn->ui32SemanticIndex; - - // Disable swizzles if this is a scalar - if (psContext->psShader->eShaderType == HULL_SHADER) - { - if ((psContext->psShader->abScalarOutput[1][psOperand->ui32RegisterNumber] & psOperand->GetAccessMask()) != 0) - *pui32IgnoreSwizzle = 1; - } - else - { - if ((psContext->psShader->abScalarInput[1][psOperand->ui32RegisterNumber] & psOperand->GetAccessMask()) != 0) - *pui32IgnoreSwizzle = 1; - } - break; - } - break; - } - default: - { - ASSERT(0); - break; - } - } - - if (hasCtor && (*pui32IgnoreSwizzle == 0)) - { - oss << TranslateOperandSwizzle(psOperand, ui32CompMask, piRebase ? *piRebase : 0); - *pui32IgnoreSwizzle = 1; - } - - if (needsBoolUpscale) - { - if (requestedType == SVT_UINT || requestedType == SVT_UINT16 || requestedType == SVT_UINT8) - oss << ") * 0xffffffffu"; - else - oss << ") * int(0xffffffffu)"; - numParenthesis--; - } - - while (numParenthesis != 0) - { - oss << ")"; - numParenthesis--; - } - return oss.str(); + if (psVarType && psVarType->Class == SVC_VECTOR && !*pui32IgnoreSwizzle) + { + switch (rebase) + { + case 4: + { + if (psVarType->Columns == 2) + { + //.x(GLSL) is .y(HLSL). .y(GLSL) is .z(HLSL) + oss << ".xxyx"; + } + else if (psVarType->Columns == 3) + { + //.x(GLSL) is .y(HLSL). .y(GLSL) is .z(HLSL) .z(GLSL) is .w(HLSL) + oss << ".xxyz"; + } + break; + } + case 8: + { + if (psVarType->Columns == 2) + { + //.x(GLSL) is .z(HLSL). .y(GLSL) is .w(HLSL) + oss << ".xxxy"; + } + break; + } + case 0: + default: + { + //No rebase, but extend to vec4. + if (psVarType->Columns == 2) + { + oss << ".xyxx"; + } + else if (psVarType->Columns == 3) + { + oss << ".xyzx"; + } + break; + } + } + } + + if (psVarType && psVarType->Class == SVC_SCALAR) + { + *pui32IgnoreSwizzle = 1; + + // CB arrays are all declared as 4-component vectors to match DX11 data layout. + // Therefore add swizzle here to access the element corresponding to the scalar var. + if ((psVarType->Elements > 0) && (psContext->psShader->eShaderType == COMPUTE_SHADER)) + { + oss << ".x"; + } + } + } + break; + } + case OPERAND_TYPE_RESOURCE: + { + oss << ResourceName(RGROUP_TEXTURE, psOperand->ui32RegisterNumber); + *pui32IgnoreSwizzle = 1; + break; + } + case OPERAND_TYPE_SAMPLER: + { + oss << ResourceName(RGROUP_SAMPLER, psOperand->ui32RegisterNumber); + *pui32IgnoreSwizzle = 1; + break; + } + case OPERAND_TYPE_FUNCTION_BODY: + { + ASSERT(0); + break; + } + case OPERAND_TYPE_INPUT_FORK_INSTANCE_ID: + case OPERAND_TYPE_INPUT_JOIN_INSTANCE_ID: + { + oss << "phaseInstanceID"; // Not a real builtin, but passed as a function parameter. + *pui32IgnoreSwizzle = 1; + break; + } + case OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER: + { + oss << "ImmCB_" << psContext->currentPhase; + oss << TranslateOperandIndex(psOperand, 0); + break; + } + case OPERAND_TYPE_INPUT_DOMAIN_POINT: + { + oss << "mtl_TessCoord"; + break; + } + case OPERAND_TYPE_INPUT_CONTROL_POINT: + { + int ignoreRedirect = 1; + int regSpace = psOperand->GetRegisterSpace(psContext); + + if ((regSpace == 0 && psContext->psShader->asPhases[psContext->currentPhase].acInputNeedsRedirect[psOperand->ui32RegisterNumber] == 0xfe) || + (regSpace == 1 && psContext->psShader->asPhases[psContext->currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] == 0xfe)) + { + ignoreRedirect = 0; + } + + if (ignoreRedirect) + { + oss << "input.cp"; + oss << TranslateOperandIndex(psOperand, 0);//Vertex index + oss << "." << psContext->GetDeclaredInputName(psOperand, piRebase, ignoreRedirect, pui32IgnoreSwizzle); + } + else + { + oss << psContext->GetDeclaredInputName(psOperand, piRebase, ignoreRedirect, pui32IgnoreSwizzle); + oss << TranslateOperandIndex(psOperand, 0);//Vertex index + } + + // Check for scalar + if ((psContext->psShader->abScalarInput[psOperand->GetRegisterSpace(psContext)][psOperand->ui32RegisterNumber] & psOperand->GetAccessMask()) != 0) + *pui32IgnoreSwizzle = 1; + break; + } + case OPERAND_TYPE_NULL: + { + // Null register, used to discard results of operations + oss << "//null"; + break; + } + case OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID: + { + oss << "controlPointID"; + *pui32IgnoreSwizzle = 1; + break; + } + case OPERAND_TYPE_OUTPUT_COVERAGE_MASK: + { + oss << "mtl_CoverageMask"; + *pui32IgnoreSwizzle = 1; + break; + } + case OPERAND_TYPE_INPUT_COVERAGE_MASK: + { + oss << "mtl_CoverageMask"; + //Skip swizzle on scalar types. + *pui32IgnoreSwizzle = 1; + break; + } + case OPERAND_TYPE_INPUT_THREAD_ID://SV_DispatchThreadID + { + oss << "mtl_ThreadID"; + break; + } + case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP://SV_GroupThreadID + { + oss << "mtl_ThreadIDInGroup"; + break; + } + case OPERAND_TYPE_INPUT_THREAD_GROUP_ID://SV_GroupID + { + oss << "mtl_ThreadGroupID"; + break; + } + case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED://SV_GroupIndex + { + if (requestedComponents > 1 && !hasCtor) + { + oss << GetConstructorForType(psContext, eType, requestedComponents, false) << "("; + numParenthesis++; + hasCtor = 1; + } + for (uint32_t i = 0; i < requestedComponents; i++) + { + oss << "mtl_ThreadIndexInThreadGroup"; + if (i < requestedComponents - 1) + oss << ", "; + } + *pui32IgnoreSwizzle = 1; // No swizzle meaningful for scalar. + break; + } + case OPERAND_TYPE_UNORDERED_ACCESS_VIEW: + { + oss << ResourceName(RGROUP_UAV, psOperand->ui32RegisterNumber); + *pui32IgnoreSwizzle = 1; + break; + } + case OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY: + { + oss << "TGSM" << psOperand->ui32RegisterNumber; + *pui32IgnoreSwizzle = 1; + break; + } + case OPERAND_TYPE_INPUT_PRIMITIVEID: + { + // Not supported on Metal + ASSERT(0); + break; + } + case OPERAND_TYPE_INDEXABLE_TEMP: + { + oss << "TempArray" << psOperand->aui32ArraySizes[0] << "["; + if (psOperand->aui32ArraySizes[1] != 0 || !psOperand->m_SubOperands[1].get()) + oss << psOperand->aui32ArraySizes[1]; + + if (psOperand->m_SubOperands[1].get()) + { + if (psOperand->aui32ArraySizes[1] != 0) + oss << "+"; + oss << TranslateOperand(psOperand->m_SubOperands[1].get(), TO_FLAG_INTEGER); + } + oss << "]"; + break; + } + case OPERAND_TYPE_STREAM: + { + // Not supported on Metal + ASSERT(0); + break; + } + case OPERAND_TYPE_INPUT_GS_INSTANCE_ID: + { + // Not supported on Metal + ASSERT(0); + break; + } + case OPERAND_TYPE_THIS_POINTER: + { + ASSERT(0); // Nope. + break; + } + case OPERAND_TYPE_INPUT_PATCH_CONSTANT: + { + const ShaderInfo::InOutSignature* psIn; + psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->GetAccessMask(), &psIn); + *piRebase = psIn->iRebase; + switch (psIn->eSystemValueType) + { + case NAME_POSITION: + oss << "mtl_Position"; + break; + case NAME_RENDER_TARGET_ARRAY_INDEX: + oss << "mtl_Layer"; + *pui32IgnoreSwizzle = 1; + break; + case NAME_CLIP_DISTANCE: + // this is temp variable, declaration and redirecting to actual output is handled in DeclareClipPlanes + char tmpName[128]; sprintf(tmpName, "phase%d_ClipDistance%d", psContext->currentPhase, psIn->ui32SemanticIndex); + oss << tmpName; + *pui32IgnoreSwizzle = 1; + break; + case NAME_VIEWPORT_ARRAY_INDEX: + oss << "mtl_ViewPortIndex"; + *pui32IgnoreSwizzle = 1; + break; + case NAME_VERTEX_ID: + oss << "mtl_VertexID"; + *pui32IgnoreSwizzle = 1; + break; + case NAME_INSTANCE_ID: + oss << "mtl_InstanceID"; + *pui32IgnoreSwizzle = 1; + break; + case NAME_IS_FRONT_FACE: + oss << "(mtl_FrontFace ? 0xffffffffu : uint(0))"; + *pui32IgnoreSwizzle = 1; + break; + case NAME_PRIMITIVE_ID: + // Not on Metal + ASSERT(0); + break; + + // as far as i understand tesselation factors are always coming from tessFactor variable (it is always declared in ToMetal::Translate) + case NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR: + case NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR: + case NAME_FINAL_LINE_DENSITY_TESSFACTOR: + if (psContext->psShader->aIndexedOutput[1][psOperand->ui32RegisterNumber]) + oss << "tessFactor.edgeTessellationFactor"; + else + oss << "tessFactor.edgeTessellationFactor[0]"; + *pui32IgnoreSwizzle = 1; + break; + case NAME_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR: + case NAME_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR: + case NAME_FINAL_LINE_DETAIL_TESSFACTOR: + oss << "tessFactor.edgeTessellationFactor[1]"; + *pui32IgnoreSwizzle = 1; + break; + case NAME_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR: + case NAME_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR: + oss << "tessFactor.edgeTessellationFactor[2]"; + *pui32IgnoreSwizzle = 1; + break; + case NAME_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR: + oss << "tessFactor.edgeTessellationFactor[3]"; + *pui32IgnoreSwizzle = 1; + break; + case NAME_FINAL_TRI_INSIDE_TESSFACTOR: + case NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR: + if (psContext->psShader->aIndexedOutput[1][psOperand->ui32RegisterNumber]) + oss << "tessFactor.insideTessellationFactor"; + else + oss << "tessFactor.insideTessellationFactor[0]"; + *pui32IgnoreSwizzle = 1; + break; + case NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR: + oss << "tessFactor.insideTessellationFactor[1]"; + *pui32IgnoreSwizzle = 1; + break; + + default: + const std::string patchPrefix = "patch."; + + if (psContext->psShader->eShaderType == DOMAIN_SHADER) + oss << psContext->inputPrefix << patchPrefix << psIn->semanticName << psIn->ui32SemanticIndex; + else + oss << patchPrefix << psIn->semanticName << psIn->ui32SemanticIndex; + + // Disable swizzles if this is a scalar + if (psContext->psShader->eShaderType == HULL_SHADER) + { + if ((psContext->psShader->abScalarOutput[1][psOperand->ui32RegisterNumber] & psOperand->GetAccessMask()) != 0) + *pui32IgnoreSwizzle = 1; + } + else + { + if ((psContext->psShader->abScalarInput[1][psOperand->ui32RegisterNumber] & psOperand->GetAccessMask()) != 0) + *pui32IgnoreSwizzle = 1; + } + break; + } + break; + } + default: + { + ASSERT(0); + break; + } + } + + if (hasCtor && (*pui32IgnoreSwizzle == 0)) + { + oss << TranslateOperandSwizzle(psOperand, ui32CompMask, piRebase ? *piRebase : 0); + *pui32IgnoreSwizzle = 1; + } + + if (needsBoolUpscale) + { + if (requestedType == SVT_UINT || requestedType == SVT_UINT16 || requestedType == SVT_UINT8) + oss << ") * 0xffffffffu"; + else + oss << ") * int(0xffffffffu)"; + numParenthesis--; + + oss << ")"; + numParenthesis--; + } + + while (numParenthesis != 0) + { + oss << ")"; + numParenthesis--; + } + return oss.str(); } std::string ToMetal::TranslateOperand(const Operand* psOperand, uint32_t ui32TOFlag, uint32_t ui32ComponentMask) { - std::ostringstream oss; - uint32_t ui32IgnoreSwizzle = 0; - int iRebase = 0; - - // in single-component mode there is no need to use mask - if (psOperand->eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE) - ui32ComponentMask = OPERAND_4_COMPONENT_MASK_ALL; - - if(ui32TOFlag & TO_FLAG_NAME_ONLY) - { - return TranslateVariableName(psOperand, ui32TOFlag, &ui32IgnoreSwizzle, OPERAND_4_COMPONENT_MASK_ALL, &iRebase); - } - - switch (psOperand->eModifier) - { - case OPERAND_MODIFIER_NONE: - { - break; - } - case OPERAND_MODIFIER_NEG: - { - oss << ("(-"); - break; - } - case OPERAND_MODIFIER_ABS: - { - oss << ("abs("); - break; - } - case OPERAND_MODIFIER_ABSNEG: - { - oss << ("-abs("); - break; - } - } - - oss << TranslateVariableName(psOperand, ui32TOFlag, &ui32IgnoreSwizzle, ui32ComponentMask, &iRebase); - - if (!ui32IgnoreSwizzle) - { - oss << TranslateOperandSwizzle(psOperand, ui32ComponentMask, iRebase); - } - - switch (psOperand->eModifier) - { - case OPERAND_MODIFIER_NONE: - { - break; - } - case OPERAND_MODIFIER_NEG: - { - oss << (")"); - break; - } - case OPERAND_MODIFIER_ABS: - { - oss << (")"); - break; - } - case OPERAND_MODIFIER_ABSNEG: - { - oss << (")"); - break; - } - } - return oss.str(); + std::ostringstream oss; + uint32_t ui32IgnoreSwizzle = 0; + int iRebase = 0; + + // in single-component mode there is no need to use mask + if (psOperand->eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE) + ui32ComponentMask = OPERAND_4_COMPONENT_MASK_ALL; + + if (ui32TOFlag & TO_FLAG_NAME_ONLY) + { + return TranslateVariableName(psOperand, ui32TOFlag, &ui32IgnoreSwizzle, OPERAND_4_COMPONENT_MASK_ALL, &iRebase); + } + + switch (psOperand->eModifier) + { + case OPERAND_MODIFIER_NONE: + { + break; + } + case OPERAND_MODIFIER_NEG: + { + oss << ("(-"); + break; + } + case OPERAND_MODIFIER_ABS: + { + oss << ("abs("); + break; + } + case OPERAND_MODIFIER_ABSNEG: + { + oss << ("-abs("); + break; + } + } + + oss << TranslateVariableName(psOperand, ui32TOFlag, &ui32IgnoreSwizzle, ui32ComponentMask, &iRebase); + + if (!ui32IgnoreSwizzle) + { + oss << TranslateOperandSwizzle(psOperand, ui32ComponentMask, iRebase); + } + + switch (psOperand->eModifier) + { + case OPERAND_MODIFIER_NONE: + { + break; + } + case OPERAND_MODIFIER_NEG: + { + oss << (")"); + break; + } + case OPERAND_MODIFIER_ABS: + { + oss << (")"); + break; + } + case OPERAND_MODIFIER_ABSNEG: + { + oss << (")"); + break; + } + } + return oss.str(); }