From e6c4b69779a89608c160a8a09516306c812f8ce0 Mon Sep 17 00:00:00 2001 From: Daniel Vijayakumar Date: Sat, 15 Jan 2022 05:18:37 -0500 Subject: [PATCH 01/28] Adding HDR texture formats. --- src/matoya.h | 1 + src/windows/gfx/d3d11-ctx.c | 2 +- src/windows/gfx/d3d11.c | 8 ++++++-- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/matoya.h b/src/matoya.h index 7c763967d..657a6a25f 100644 --- a/src/matoya.h +++ b/src/matoya.h @@ -70,6 +70,7 @@ typedef enum { MTY_COLOR_FORMAT_BGR565 = 6, ///< 5-bits blue, 6-bits green, 5-bits red. MTY_COLOR_FORMAT_BGRA5551 = 7, ///< 5-bits per BGR channels, 1-bit alpha. MTY_COLOR_FORMAT_AYUV = 8, ///< 4:4:4 full W/H interleaved Y, U, V. + MTY_COLOR_FORMAT_RGBA16F = 9, ///< 16-bits floating-point precision per channel RGBA. MTY_COLOR_FORMAT_MAKE_32 = INT32_MAX, } MTY_ColorFormat; diff --git a/src/windows/gfx/d3d11-ctx.c b/src/windows/gfx/d3d11-ctx.c index b3aea31d7..a9adea218 100644 --- a/src/windows/gfx/d3d11-ctx.c +++ b/src/windows/gfx/d3d11-ctx.c @@ -78,7 +78,7 @@ static bool d3d11_ctx_init(struct d3d11_ctx *ctx) IDXGISwapChain1 *swap_chain1 = NULL; DXGI_SWAP_CHAIN_DESC1 sd = {0}; - sd.Format = DXGI_FORMAT_B8G8R8A8_UNORM; + sd.Format = DXGI_FORMAT_R16G16B16A16_FLOAT; // TODO: Need to make this an input parameter sd.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; sd.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD; sd.SampleDesc.Count = 1; diff --git a/src/windows/gfx/d3d11.c b/src/windows/gfx/d3d11.c index 26bb771e8..59eee73ed 100644 --- a/src/windows/gfx/d3d11.c +++ b/src/windows/gfx/d3d11.c @@ -279,10 +279,14 @@ static HRESULT d3d11_reload_textures(struct d3d11 *ctx, ID3D11Device *device, ID case MTY_COLOR_FORMAT_BGRA: case MTY_COLOR_FORMAT_AYUV: case MTY_COLOR_FORMAT_BGR565: - case MTY_COLOR_FORMAT_BGRA5551: { + case MTY_COLOR_FORMAT_BGRA5551: + case MTY_COLOR_FORMAT_RGBA16F: { DXGI_FORMAT format = desc->format == MTY_COLOR_FORMAT_BGR565 ? DXGI_FORMAT_B5G6R5_UNORM : - desc->format == MTY_COLOR_FORMAT_BGRA5551 ? DXGI_FORMAT_B5G5R5A1_UNORM : DXGI_FORMAT_B8G8R8A8_UNORM; + desc->format == MTY_COLOR_FORMAT_BGRA5551 ? DXGI_FORMAT_B5G5R5A1_UNORM : + desc->format == MTY_COLOR_FORMAT_RGBA16F ? DXGI_FORMAT_R16G16B16A16_FLOAT : DXGI_FORMAT_B8G8R8A8_UNORM; uint8_t bpp = (desc->format == MTY_COLOR_FORMAT_BGRA || desc->format == MTY_COLOR_FORMAT_AYUV) ? 4 : 2; + if (format == DXGI_FORMAT_R16G16B16A16_FLOAT) + bpp = 8; // BGRA HRESULT e = d3d11_refresh_resource(&ctx->staging[0], device, format, desc->cropWidth, desc->cropHeight); From b888b8ee03150c9f366a938eef06da77a22744aa Mon Sep 17 00:00:00 2001 From: Daniel Vijayakumar Date: Fri, 4 Mar 2022 18:03:19 -0500 Subject: [PATCH 02/28] Completed HDR prototype! We can now switch between framebuffer formats and colorspaces on the fly. So far it's working for sRGB and scRGB. Now need to make it work for HDR10 and then polish the implementation. --- src/matoya.h | 69 +++++++++------- src/windows/gfx/d3d11-ctx.c | 104 ++++++++++++++++++++++++- src/windows/gfx/d3d11.c | 5 +- src/windows/gfx/shaders/d3d11/ps.ps4 | 17 ++++ src/windows/gfx/shaders/d3d11/psui.ps4 | 1 + 5 files changed, 163 insertions(+), 33 deletions(-) diff --git a/src/matoya.h b/src/matoya.h index 657a6a25f..252c24dc7 100644 --- a/src/matoya.h +++ b/src/matoya.h @@ -58,22 +58,32 @@ typedef enum { /// @brief Raw image color formats. typedef enum { - MTY_COLOR_FORMAT_UNKNOWN = 0, ///< Unknown color format. - MTY_COLOR_FORMAT_BGRA = 1, ///< 8-bits per channel BGRA. - MTY_COLOR_FORMAT_NV12 = 2, ///< 4:2:0 full W/H Y plane followed by an interleaved half - ///< W/H UV plane. - MTY_COLOR_FORMAT_I420 = 3, ///< 4:2:0 full W/H Y plane followed by a half W/H U plane - ///< followed by a half W/H V plane. - MTY_COLOR_FORMAT_I444 = 4, ///< 4:4:4 full W/H consecutive Y, U, V planes. - MTY_COLOR_FORMAT_NV16 = 5, ///< 4:2:2 full W/H Y plane followed by an interleaved half W - ///< full H UV plane. - MTY_COLOR_FORMAT_BGR565 = 6, ///< 5-bits blue, 6-bits green, 5-bits red. - MTY_COLOR_FORMAT_BGRA5551 = 7, ///< 5-bits per BGR channels, 1-bit alpha. - MTY_COLOR_FORMAT_AYUV = 8, ///< 4:4:4 full W/H interleaved Y, U, V. - MTY_COLOR_FORMAT_RGBA16F = 9, ///< 16-bits floating-point precision per channel RGBA. - MTY_COLOR_FORMAT_MAKE_32 = INT32_MAX, + MTY_COLOR_FORMAT_UNKNOWN = 0, ///< Unknown color format. + MTY_COLOR_FORMAT_BGRA = 1, ///< 8-bits per channel BGRA. + MTY_COLOR_FORMAT_NV12 = 2, ///< 4:2:0 full W/H Y plane followed by an interleaved half + ///< W/H UV plane. + MTY_COLOR_FORMAT_I420 = 3, ///< 4:2:0 full W/H Y plane followed by a half W/H U plane + ///< followed by a half W/H V plane. + MTY_COLOR_FORMAT_I444 = 4, ///< 4:4:4 full W/H consecutive Y, U, V planes. + MTY_COLOR_FORMAT_NV16 = 5, ///< 4:2:2 full W/H Y plane followed by an interleaved half W + ///< full H UV plane. + MTY_COLOR_FORMAT_BGR565 = 6, ///< 5-bits blue, 6-bits green, 5-bits red. + MTY_COLOR_FORMAT_BGRA5551 = 7, ///< 5-bits per BGR channels, 1-bit alpha. + MTY_COLOR_FORMAT_AYUV = 8, ///< 4:4:4 full W/H interleaved Y, U, V. + MTY_COLOR_FORMAT_RGBA16F = 9, ///< 16-bits floating-point precision per channel RGBA. + MTY_COLOR_FORMAT_RGB10A2 = 10, ///< 10-bits per RGB channels, 2-bit alpha. + MTY_COLOR_FORMAT_MAKE_32 = INT32_MAX, } MTY_ColorFormat; +/// @brief Defines the color encoding of the raw image. Note that certain color spaces and color formats are tightly coupled with each other. +typedef enum { + MTY_COLOR_SPACE_UNKNOWN = 0, ///< Unknown color space. + MTY_COLOR_SPACE_SRGB = 1, ///< sRGB/rec709 primaries and a non-linear transfer function (approx gamma curve of 2.2). Supported by all color formats. + MTY_COLOR_SPACE_SCRGB_LINEAR = 2, ///< Microsoft's scRGB wide gamut color space which is based on sRGB/rec709 primaries and has a linear transfer function. Only supported by color format MTY_COLOR_FORMAT_RGBA16F. + MTY_COLOR_SPACE_HDR10 = 3, ///< Uses the rec2020 color primaries and the rec2100 non-linear transfer function (ST 2084 perceptual quantizer, aka PQ). Only supported by color format MTY_COLOR_FORMAT_RGB10A2. + MTY_COLOR_SPACE_MAKE_32 = INT32_MAX, +} MTY_ColorSpace; + /// @brief Quad texture filtering. typedef enum { MTY_FILTER_NEAREST = 0, ///< Nearest neighbor filter by the GPU, can cause shimmering. @@ -102,21 +112,22 @@ typedef enum { /// @brief Description of a render operation. typedef struct { - MTY_ColorFormat format; ///< The color format of a raw image. - MTY_Rotation rotation; ///< Rotation applied to the image. - MTY_Filter filter; ///< Filter applied to the image. - MTY_Effect effect; ///< Effect applied to the image. - uint32_t imageWidth; ///< The width in pixels of the image. - uint32_t imageHeight; ///< The height in pixels of the image. - uint32_t cropWidth; ///< Desired crop width of the image from the top left corner. - uint32_t cropHeight; ///< Desired crop height of the image from the top left corner. - uint32_t viewWidth; ///< The width of the viewport. - uint32_t viewHeight; ///< The height of the viewport. - float aspectRatio; ///< Desired aspect ratio of the image. The renderer will letterbox - ///< the image to maintain the specified aspect ratio. - float scale; ///< Multiplier applied to the dimensions of the image, producing an - ///< minimized or magnified image. This can be set to 0 - ///< if unnecessary. + MTY_ColorFormat format; ///< The color format of a raw image. + MTY_ColorSpace colorspace; ///< Defines the color encoding of the image. + MTY_Rotation rotation; ///< Rotation applied to the image. + MTY_Filter filter; ///< Filter applied to the image. + MTY_Effect effect; ///< Effect applied to the image. + uint32_t imageWidth; ///< The width in pixels of the image. + uint32_t imageHeight; ///< The height in pixels of the image. + uint32_t cropWidth; ///< Desired crop width of the image from the top left corner. + uint32_t cropHeight; ///< Desired crop height of the image from the top left corner. + uint32_t viewWidth; ///< The width of the viewport. + uint32_t viewHeight; ///< The height of the viewport. + float aspectRatio; ///< Desired aspect ratio of the image. The renderer will letterbox + ///< the image to maintain the specified aspect ratio. + float scale; ///< Multiplier applied to the dimensions of the image, producing an + ///< minimized or magnified image. This can be set to 0 + ///< if unnecessary. } MTY_RenderDesc; /// @brief A point with an `x` and `y` coordinate. diff --git a/src/windows/gfx/d3d11-ctx.c b/src/windows/gfx/d3d11-ctx.c index a9adea218..2779f1c60 100644 --- a/src/windows/gfx/d3d11-ctx.c +++ b/src/windows/gfx/d3d11-ctx.c @@ -10,6 +10,7 @@ GFX_CTX_PROTOTYPES(_d3d11_) #define COBJMACROS #include #include +#include #define DXGI_FATAL(e) ( \ (e) == DXGI_ERROR_DEVICE_REMOVED || \ @@ -29,10 +30,15 @@ struct d3d11_ctx { uint32_t width; uint32_t height; MTY_Renderer *renderer; + DXGI_FORMAT format; + DXGI_FORMAT format_new; + DXGI_COLOR_SPACE_TYPE colorspace; + DXGI_COLOR_SPACE_TYPE colorspace_new; ID3D11Device *device; ID3D11DeviceContext *context; ID3D11Texture2D *back_buffer; IDXGISwapChain2 *swap_chain2; + IDXGISwapChain3 *swap_chain3; HANDLE waitable; }; @@ -45,6 +51,57 @@ static void d3d11_ctx_get_size(struct d3d11_ctx *ctx, uint32_t *width, uint32_t *height = rect.bottom - rect.top; } +static void mty_validate_format_colorspace(struct d3d11_ctx *ctx, MTY_ColorFormat format, MTY_ColorSpace colorspace, DXGI_FORMAT *format_out, DXGI_COLOR_SPACE_TYPE *colorspace_out) +{ + DXGI_FORMAT format_new = DXGI_FORMAT_R8G8B8A8_UNORM; + DXGI_COLOR_SPACE_TYPE colorspace_new = DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P709; + + // Use the last known value if unspecified + if (format == MTY_COLOR_FORMAT_UNKNOWN) { + format_new = ctx->format; + } + if (colorspace == MTY_COLOR_SPACE_UNKNOWN) { + colorspace_new = ctx->colorspace; + } + + switch (format) { + case MTY_COLOR_FORMAT_RGBA16F: + format_new = DXGI_FORMAT_R16G16B16A16_FLOAT; + break; + case MTY_COLOR_FORMAT_RGB10A2: + format_new = DXGI_FORMAT_R10G10B10A2_UNORM; + break; + } + + switch (colorspace) { + case MTY_COLOR_SPACE_SRGB: + colorspace_new = DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P709; + break; + case MTY_COLOR_SPACE_SCRGB_LINEAR: + colorspace_new = DXGI_COLOR_SPACE_RGB_FULL_G10_NONE_P709; + break; + case MTY_COLOR_SPACE_HDR10: + colorspace_new = DXGI_COLOR_SPACE_RGB_FULL_G2084_NONE_P2020; + break; + } + + // Ensure that the format and colorspace are a valid pairing + // TODO: An improvement would be to log an error as well instead of only forcing the values + switch (colorspace_new) { + case DXGI_COLOR_SPACE_RGB_FULL_G10_NONE_P709: + format_new = DXGI_FORMAT_R16G16B16A16_FLOAT; + break; + case DXGI_COLOR_SPACE_RGB_FULL_G2084_NONE_P2020: + format_new = DXGI_FORMAT_R10G10B10A2_UNORM; + break; + default: + break; + } + + *format_out = format_new; + *colorspace_out = colorspace_new; +} + static void d3d11_ctx_free(struct d3d11_ctx *ctx) { if (ctx->back_buffer) @@ -56,6 +113,9 @@ static void d3d11_ctx_free(struct d3d11_ctx *ctx) if (ctx->swap_chain2) IDXGISwapChain2_Release(ctx->swap_chain2); + if (ctx->swap_chain3) + IDXGISwapChain3_Release(ctx->swap_chain3); + if (ctx->context) ID3D11DeviceContext_Release(ctx->context); @@ -65,6 +125,7 @@ static void d3d11_ctx_free(struct d3d11_ctx *ctx) ctx->back_buffer = NULL; ctx->waitable = NULL; ctx->swap_chain2 = NULL; + ctx->swap_chain3 = NULL; ctx->context = NULL; ctx->device = NULL; } @@ -77,8 +138,12 @@ static bool d3d11_ctx_init(struct d3d11_ctx *ctx) IDXGIFactory2 *factory2 = NULL; IDXGISwapChain1 *swap_chain1 = NULL; + ctx->format = MTY_COLOR_FORMAT_BGRA; + ctx->colorspace = MTY_COLOR_SPACE_SRGB; + DXGI_SWAP_CHAIN_DESC1 sd = {0}; - sd.Format = DXGI_FORMAT_R16G16B16A16_FLOAT; // TODO: Need to make this an input parameter + sd.Format = DXGI_FORMAT_R8G8B8A8_UNORM; // TODO: Sync this with ctx->format initial value + // sd.Format = DXGI_FORMAT_R16G16B16A16_FLOAT; // TODO: Need to make this an input parameter sd.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; sd.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD; sd.SampleDesc.Count = 1; @@ -129,6 +194,12 @@ static bool d3d11_ctx_init(struct d3d11_ctx *ctx) goto except; } + e = IDXGISwapChain1_QueryInterface(swap_chain1, &IID_IDXGISwapChain3, &ctx->swap_chain3); + if (e != S_OK) { + MTY_Log("'IDXGISwapChain1_QueryInterface' failed with HRESULT 0x%X", e); + goto except; + } + ctx->waitable = IDXGISwapChain2_GetFrameLatencyWaitableObject(ctx->swap_chain2); if (!ctx->waitable) { e = !S_OK; @@ -227,7 +298,7 @@ static void d3d11_ctx_refresh(struct d3d11_ctx *ctx) if (ctx->width != width || ctx->height != height) { HRESULT e = IDXGISwapChain2_ResizeBuffers(ctx->swap_chain2, 0, 0, 0, - DXGI_FORMAT_UNKNOWN, D3D11_SWFLAGS); + DXGI_FORMAT_UNKNOWN, D3D11_SWFLAGS); // unknown format will resize without changing the existing format if (e == S_OK) { ctx->width = width; @@ -240,6 +311,27 @@ static void d3d11_ctx_refresh(struct d3d11_ctx *ctx) d3d11_ctx_init(ctx); } } + + DXGI_FORMAT format = ctx->format_new; + DXGI_COLOR_SPACE_TYPE colorspace = ctx->colorspace_new; + + if (ctx->format != format || ctx->colorspace != colorspace) { + HRESULT e = IDXGISwapChain2_ResizeBuffers(ctx->swap_chain2, 0, 0, 0, + format, D3D11_SWFLAGS); + // TODO: Need to query for display capabilities via CheckColorSpaceSupport before calling SetColorSpace1 + e = IDXGISwapChain3_SetColorSpace1(ctx->swap_chain3, colorspace); + + if (e == S_OK) { + ctx->format = format; + ctx->colorspace = colorspace; + } + + if (DXGI_FATAL(e)) { + MTY_Log("'IDXGISwapChain2_ResizeBuffers' failed with HRESULT 0x%X", e); + d3d11_ctx_free(ctx); + d3d11_ctx_init(ctx); + } + } } MTY_Surface *mty_d3d11_ctx_get_surface(struct gfx_ctx *gfx_ctx) @@ -289,6 +381,12 @@ void mty_d3d11_ctx_draw_quad(struct gfx_ctx *gfx_ctx, const void *image, const M { struct d3d11_ctx *ctx = (struct d3d11_ctx *) gfx_ctx; + DXGI_FORMAT format = DXGI_FORMAT_R8G8B8A8_UNORM; + DXGI_COLOR_SPACE_TYPE colorspace = DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P709; + mty_validate_format_colorspace(ctx, desc->format, desc->colorspace, &format, &colorspace); + ctx->format_new = format; + ctx->colorspace_new = colorspace; + mty_d3d11_ctx_get_surface(gfx_ctx); if (ctx->back_buffer) { @@ -305,6 +403,8 @@ void mty_d3d11_ctx_draw_ui(struct gfx_ctx *gfx_ctx, const MTY_DrawData *dd) { struct d3d11_ctx *ctx = (struct d3d11_ctx *) gfx_ctx; + // TODO: Always render the UI in SDR and composite it on top of the quad + mty_d3d11_ctx_get_surface(gfx_ctx); if (ctx->back_buffer) diff --git a/src/windows/gfx/d3d11.c b/src/windows/gfx/d3d11.c index 59eee73ed..13c7aa7b9 100644 --- a/src/windows/gfx/d3d11.c +++ b/src/windows/gfx/d3d11.c @@ -27,8 +27,8 @@ struct d3d11_psvars { uint32_t filter; uint32_t effect; uint32_t format; - uint32_t rotation; - uint32_t __pad[1]; // Constant buffers must be in increments of 16 bytes + uint32_t colorspace; + uint32_t rotation; // Constant buffers must be in increments of 16 bytes }; struct d3d11_res { @@ -425,6 +425,7 @@ bool mty_d3d11_render(struct gfx *gfx, MTY_Device *device, MTY_Context *context, cb.filter = desc->filter; cb.effect = desc->effect; cb.format = ctx->format; + cb.colorspace = desc->colorspace; cb.rotation = desc->rotation; D3D11_MAPPED_SUBRESOURCE res = {0}; diff --git a/src/windows/gfx/shaders/d3d11/ps.ps4 b/src/windows/gfx/shaders/d3d11/ps.ps4 index e481b8ed8..fe3748aa7 100644 --- a/src/windows/gfx/shaders/d3d11/ps.ps4 +++ b/src/windows/gfx/shaders/d3d11/ps.ps4 @@ -16,6 +16,7 @@ cbuffer VS_CONSTANT_BUFFER : register(b0) { uint filter; uint effect; uint format; + uint colorspace; uint rotation; uint __pad[1]; }; @@ -119,6 +120,22 @@ float4 main(VS_OUTPUT input) : SV_TARGET rgba = yuv_to_rgba(y, u, v); + // RGBA16F, scRGB linear + } else if (format == 9 && colorspace == 2) { + rgba = tex0.Sample(ss, uv); + + // RGB10A2, either sRGB or HDR10 + } else if (format == 10 && (colorspace == 1 || colorspace == 3)) { + if (colorspace == 1) { + // sRGB - do nothing different + rgba = tex0.Sample(ss, uv); + } else { + // HDR10 + // TODO: Do we need to do a st 2084 encoding/decoding? + // TODO: Do we need to do any tonemapping? + rgba = tex0.Sample(ss, uv); + } + // BGRA } else { rgba = tex0.Sample(ss, uv); diff --git a/src/windows/gfx/shaders/d3d11/psui.ps4 b/src/windows/gfx/shaders/d3d11/psui.ps4 index aa69b1e6a..e7094d6bc 100644 --- a/src/windows/gfx/shaders/d3d11/psui.ps4 +++ b/src/windows/gfx/shaders/d3d11/psui.ps4 @@ -15,5 +15,6 @@ Texture2D texture0; float4 main(PS_INPUT input) : SV_Target { + // TODO: We will probably need to convert the sRGB UI into linear space as part of compositing UI on top of HDR frame return input.col * texture0.Sample(sampler0, input.uv); } From dde07340ab65ef0942eb97305402792a7b9f3d56 Mon Sep 17 00:00:00 2001 From: Daniel Vijayakumar Date: Sat, 12 Mar 2022 00:40:58 -0500 Subject: [PATCH 03/28] First pass at getting the shader right for YUV to HDR RGB. --- src/windows/gfx/shaders/d3d11/ps.ps4 | 53 ++++++++++++++++++++++++++-- 1 file changed, 50 insertions(+), 3 deletions(-) diff --git a/src/windows/gfx/shaders/d3d11/ps.ps4 b/src/windows/gfx/shaders/d3d11/ps.ps4 index fe3748aa7..fbe46bd6c 100644 --- a/src/windows/gfx/shaders/d3d11/ps.ps4 +++ b/src/windows/gfx/shaders/d3d11/ps.ps4 @@ -43,6 +43,41 @@ inline float4 yuv_to_rgba(float y, float u, float v) return float4(r, g, b, 1.0); } +inline float4 yuv_to_rgba_rec2020(float y, float u, float v) +{ + // Using "RGB to YCbCr color conversion for UHDTV" (ITU-R BT.2020) + + // y = (y - 0.0625) * 1.164; + // u = u - 0.5; + // v = v - 0.5; + + // Thanks to colour-science (TODO: Add proper link) + const float Y_min = 0.06256109f; + const float Y_max = 0.91886608f; + const float Y_minmax_dt = Y_max - Y_min; + const float C_min = 0.06256109f; + const float C_max = 0.93841642f; + const float C_minmax_2 = (C_min + C_max) * 0.5f; // TODO: Should we just set this to 0.5f, like in yuv_to_rgba? + const float C_minmax_dt = C_max - C_min; + + y = y - Y_min; + u = u - C_minmax_2; + v = v - C_minmax_2; + + y = y * 1.0f / Y_minmax_dt; + u = u * 1.0f / C_minmax_dt; + v = v * 1.0f / C_minmax_dt; + + static const float K_r = 0.2627f; + static const float K_b = 0.0593f; + + float r = y + (2.0f - 2.0f * K_r) * v; + float b = y + (2.0f - 2.0f * K_b) * u; + float g = (y - K_r * r - K_b * b) / (1.0f - K_r - K_b); + + return float4(r, g, b, 1.0); +} + inline void gaussian(uint type, float w, float h, inout float2 uv) { float2 res = float2(w, h); @@ -102,7 +137,11 @@ float4 main(VS_OUTPUT input) : SV_TARGET float u = tex1.Sample(ss, uv).r; float v = tex1.Sample(ss, uv).g; - rgba = yuv_to_rgba(y, u, v); + if (colorspace == 3) { + rgba = yuv_to_rgba_rec2020(y, u, v); + } else { + rgba = yuv_to_rgba(y, u, v); + } // I420, I444 } else if (format == 3 || format == 4) { @@ -110,7 +149,11 @@ float4 main(VS_OUTPUT input) : SV_TARGET float u = tex1.Sample(ss, uv).r; float v = tex2.Sample(ss, uv).r; - rgba = yuv_to_rgba(y, u, v); + if (colorspace == 3) { + rgba = yuv_to_rgba_rec2020(y, u, v); + } else { + rgba = yuv_to_rgba(y, u, v); + } // AYUV } else if (format == 8) { @@ -118,7 +161,11 @@ float4 main(VS_OUTPUT input) : SV_TARGET float u = tex0.Sample(ss, uv).g; float v = tex0.Sample(ss, uv).b; - rgba = yuv_to_rgba(y, u, v); + if (colorspace == 3) { + rgba = yuv_to_rgba_rec2020(y, u, v); + } else { + rgba = yuv_to_rgba(y, u, v); + } // RGBA16F, scRGB linear } else if (format == 9 && colorspace == 2) { From d9774a2a7d30bd38965060a33ba0eafe3067294e Mon Sep 17 00:00:00 2001 From: Daniel Vijayakumar Date: Mon, 14 Mar 2022 14:27:08 -0400 Subject: [PATCH 04/28] Added support for the 10-bit and higher YUV formats. --- src/matoya.h | 7 +++-- src/windows/gfx/d3d11.c | 46 ++++++++++++++++++---------- src/windows/gfx/shaders/d3d11/ps.ps4 | 28 ++++++++--------- 3 files changed, 47 insertions(+), 34 deletions(-) diff --git a/src/matoya.h b/src/matoya.h index 252c24dc7..5ecb3c31f 100644 --- a/src/matoya.h +++ b/src/matoya.h @@ -70,8 +70,11 @@ typedef enum { MTY_COLOR_FORMAT_BGR565 = 6, ///< 5-bits blue, 6-bits green, 5-bits red. MTY_COLOR_FORMAT_BGRA5551 = 7, ///< 5-bits per BGR channels, 1-bit alpha. MTY_COLOR_FORMAT_AYUV = 8, ///< 4:4:4 full W/H interleaved Y, U, V. - MTY_COLOR_FORMAT_RGBA16F = 9, ///< 16-bits floating-point precision per channel RGBA. - MTY_COLOR_FORMAT_RGB10A2 = 10, ///< 10-bits per RGB channels, 2-bit alpha. + MTY_COLOR_FORMAT_P016 = 9, ///< 4:2:0 full W/H Y plane followed by a half W/H U plane + ///< followed by a half W/H V plane. Supports 10-bit to 16-bit data. + MTY_COLOR_FORMAT_I444_16 = 10, ///< 4:4:4 full W/H consecutive Y, U, V planes. Supports 10-bit to 16-bit data. + MTY_COLOR_FORMAT_RGB10A2 = 11, ///< 10-bits per RGB channels, 2-bit alpha. + MTY_COLOR_FORMAT_RGBA16F = 12, ///< 16-bits floating-point precision per channel RGBA. MTY_COLOR_FORMAT_MAKE_32 = INT32_MAX, } MTY_ColorFormat; diff --git a/src/windows/gfx/d3d11.c b/src/windows/gfx/d3d11.c index 13c7aa7b9..682d1c760 100644 --- a/src/windows/gfx/d3d11.c +++ b/src/windows/gfx/d3d11.c @@ -280,11 +280,13 @@ static HRESULT d3d11_reload_textures(struct d3d11 *ctx, ID3D11Device *device, ID case MTY_COLOR_FORMAT_AYUV: case MTY_COLOR_FORMAT_BGR565: case MTY_COLOR_FORMAT_BGRA5551: + case MTY_COLOR_FORMAT_RGB10A2: case MTY_COLOR_FORMAT_RGBA16F: { DXGI_FORMAT format = desc->format == MTY_COLOR_FORMAT_BGR565 ? DXGI_FORMAT_B5G6R5_UNORM : desc->format == MTY_COLOR_FORMAT_BGRA5551 ? DXGI_FORMAT_B5G5R5A1_UNORM : + desc->format == MTY_COLOR_FORMAT_RGB10A2 ? DXGI_FORMAT_R10G10B10A2_UNORM : desc->format == MTY_COLOR_FORMAT_RGBA16F ? DXGI_FORMAT_R16G16B16A16_FLOAT : DXGI_FORMAT_B8G8R8A8_UNORM; - uint8_t bpp = (desc->format == MTY_COLOR_FORMAT_BGRA || desc->format == MTY_COLOR_FORMAT_AYUV) ? 4 : 2; + uint8_t bpp = (desc->format == MTY_COLOR_FORMAT_BGRA || desc->format == MTY_COLOR_FORMAT_RGB10A2 || desc->format == MTY_COLOR_FORMAT_AYUV) ? 4 : 2; if (format == DXGI_FORMAT_R16G16B16A16_FLOAT) bpp = 8; @@ -296,47 +298,59 @@ static HRESULT d3d11_reload_textures(struct d3d11 *ctx, ID3D11Device *device, ID if (e != S_OK) return e; break; } - case MTY_COLOR_FORMAT_NV12: { + case MTY_COLOR_FORMAT_NV12: + case MTY_COLOR_FORMAT_P016: { + // See the following resources to understand memory layout of these formats: + // - https://docs.microsoft.com/en-us/windows/win32/medfound/recommended-8-bit-yuv-formats-for-video-rendering#nv12 + // - https://docs.microsoft.com/en-us/windows/win32/medfound/10-bit-and-16-bit-yuv-video-formats#p016-and-p010 + const bool nv12 = desc->format == MTY_COLOR_FORMAT_NV12; + const uint8_t bpp = nv12 ? 1 : 2; + const DXGI_FORMAT format_y = nv12 ? DXGI_FORMAT_R8_UNORM : DXGI_FORMAT_R8G8_UNORM; + const DXGI_FORMAT format_uv = nv12 ? DXGI_FORMAT_R8G8_UNORM : DXGI_FORMAT_R8G8B8A8_UNORM; + // Y - HRESULT e = d3d11_refresh_resource(&ctx->staging[0], device, DXGI_FORMAT_R8_UNORM, desc->cropWidth, desc->cropHeight); + HRESULT e = d3d11_refresh_resource(&ctx->staging[0], device, format_y, desc->cropWidth, desc->cropHeight); if (e != S_OK) return e; - e = d3d11_crop_copy(context, ctx->staging[0].resource, image, desc->cropWidth, desc->cropHeight, desc->imageWidth, 1); + e = d3d11_crop_copy(context, ctx->staging[0].resource, image, desc->cropWidth, desc->cropHeight, desc->imageWidth, bpp); if (e != S_OK) return e; // UV - e = d3d11_refresh_resource(&ctx->staging[1], device, DXGI_FORMAT_R8G8_UNORM, desc->cropWidth / 2, desc->cropHeight / 2); + e = d3d11_refresh_resource(&ctx->staging[1], device, format_uv, desc->cropWidth / 2, desc->cropHeight / 2); if (e != S_OK) return e; - e = d3d11_crop_copy(context, ctx->staging[1].resource, (uint8_t *) image + desc->imageWidth * desc->imageHeight, desc->cropWidth / 2, desc->cropHeight / 2, desc->imageWidth / 2, 2); + e = d3d11_crop_copy(context, ctx->staging[1].resource, (uint8_t *) image + desc->imageWidth * desc->imageHeight, desc->cropWidth / 2, desc->cropHeight / 2, desc->imageWidth / 2, bpp * 2); if (e != S_OK) return e; break; } case MTY_COLOR_FORMAT_I420: - case MTY_COLOR_FORMAT_I444: { - uint32_t div = desc->format == MTY_COLOR_FORMAT_I420 ? 2 : 1; + case MTY_COLOR_FORMAT_I444: + case MTY_COLOR_FORMAT_I444_16: { + const uint32_t div = desc->format == MTY_COLOR_FORMAT_I420 ? 2 : 1; + const DXGI_FORMAT format = desc->format == MTY_COLOR_FORMAT_I444_16 ? DXGI_FORMAT_R8G8_UNORM : DXGI_FORMAT_R8_UNORM; + const uint8_t bpp = desc->format == MTY_COLOR_FORMAT_I444_16 ? 2 : 1; // Y - HRESULT e = d3d11_refresh_resource(&ctx->staging[0], device, DXGI_FORMAT_R8_UNORM, desc->cropWidth, desc->cropHeight); + HRESULT e = d3d11_refresh_resource(&ctx->staging[0], device, format, desc->cropWidth, desc->cropHeight); if (e != S_OK) return e; - e = d3d11_crop_copy(context, ctx->staging[0].resource, image, desc->cropWidth, desc->cropHeight, desc->imageWidth, 1); + e = d3d11_crop_copy(context, ctx->staging[0].resource, image, desc->cropWidth, desc->cropHeight, desc->imageWidth, bpp); if (e != S_OK) return e; // U - uint8_t *p = (uint8_t *) image + desc->imageWidth * desc->imageHeight; - e = d3d11_refresh_resource(&ctx->staging[1], device, DXGI_FORMAT_R8_UNORM, desc->cropWidth / div, desc->cropHeight / div); + uint8_t *p = (uint8_t *) image + desc->imageWidth * desc->imageHeight * bpp; + e = d3d11_refresh_resource(&ctx->staging[1], device, format, desc->cropWidth / div, desc->cropHeight / div); if (e != S_OK) return e; - e = d3d11_crop_copy(context, ctx->staging[1].resource, p, desc->cropWidth / div, desc->cropHeight / div, desc->imageWidth / div, 1); + e = d3d11_crop_copy(context, ctx->staging[1].resource, p, desc->cropWidth / div, desc->cropHeight / div, desc->imageWidth / div, bpp); if (e != S_OK) return e; // V - p += (desc->imageWidth / div) * (desc->imageHeight / div); - e = d3d11_refresh_resource(&ctx->staging[2], device, DXGI_FORMAT_R8_UNORM, desc->cropWidth / div, desc->cropHeight / div); + p += (desc->imageWidth / div) * (desc->imageHeight / div) * bpp; + e = d3d11_refresh_resource(&ctx->staging[2], device, format, desc->cropWidth / div, desc->cropHeight / div); if (e != S_OK) return e; - e = d3d11_crop_copy(context, ctx->staging[2].resource, p, desc->cropWidth / div, desc->cropHeight / div, desc->imageWidth / div, 1); + e = d3d11_crop_copy(context, ctx->staging[2].resource, p, desc->cropWidth / div, desc->cropHeight / div, desc->imageWidth / div, bpp); if (e != S_OK) return e; break; } diff --git a/src/windows/gfx/shaders/d3d11/ps.ps4 b/src/windows/gfx/shaders/d3d11/ps.ps4 index fbe46bd6c..d10d3b1bb 100644 --- a/src/windows/gfx/shaders/d3d11/ps.ps4 +++ b/src/windows/gfx/shaders/d3d11/ps.ps4 @@ -131,25 +131,25 @@ float4 main(VS_OUTPUT input) : SV_TARGET if (filter == 3 || filter == 4) gaussian(filter, width, height, uv); - // NV12, NV16 - if (format == 2 || format == 5) { + // NV12, NV16, P016 + if (format == 2 || format == 5 || format == 9) { float y = tex0.Sample(ss, uv).r; float u = tex1.Sample(ss, uv).r; float v = tex1.Sample(ss, uv).g; - if (colorspace == 3) { + if (format == 9 && colorspace == 3) { rgba = yuv_to_rgba_rec2020(y, u, v); } else { rgba = yuv_to_rgba(y, u, v); } - // I420, I444 - } else if (format == 3 || format == 4) { + // I420, I444, I444_16 + } else if (format == 3 || format == 4 || format == 10) { float y = tex0.Sample(ss, uv).r; float u = tex1.Sample(ss, uv).r; float v = tex2.Sample(ss, uv).r; - if (colorspace == 3) { + if (format == 10 && colorspace == 3) { rgba = yuv_to_rgba_rec2020(y, u, v); } else { rgba = yuv_to_rgba(y, u, v); @@ -161,18 +161,10 @@ float4 main(VS_OUTPUT input) : SV_TARGET float u = tex0.Sample(ss, uv).g; float v = tex0.Sample(ss, uv).b; - if (colorspace == 3) { - rgba = yuv_to_rgba_rec2020(y, u, v); - } else { - rgba = yuv_to_rgba(y, u, v); - } - - // RGBA16F, scRGB linear - } else if (format == 9 && colorspace == 2) { - rgba = tex0.Sample(ss, uv); + rgba = yuv_to_rgba(y, u, v); // RGB10A2, either sRGB or HDR10 - } else if (format == 10 && (colorspace == 1 || colorspace == 3)) { + } else if (format == 11 && (colorspace == 1 || colorspace == 3)) { if (colorspace == 1) { // sRGB - do nothing different rgba = tex0.Sample(ss, uv); @@ -183,6 +175,10 @@ float4 main(VS_OUTPUT input) : SV_TARGET rgba = tex0.Sample(ss, uv); } + // RGBA16F, scRGB linear + } else if (format == 12 && colorspace == 2) { + rgba = tex0.Sample(ss, uv); + // BGRA } else { rgba = tex0.Sample(ss, uv); From 727da7fdeea6992c2d43f9531d411b80af751159 Mon Sep 17 00:00:00 2001 From: Daniel Vijayakumar Date: Mon, 14 Mar 2022 16:44:02 -0400 Subject: [PATCH 05/28] Fixed bug in 10-bit/16-bit texture format handling. --- src/windows/gfx/d3d11.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/windows/gfx/d3d11.c b/src/windows/gfx/d3d11.c index 682d1c760..d6fe38b1e 100644 --- a/src/windows/gfx/d3d11.c +++ b/src/windows/gfx/d3d11.c @@ -305,8 +305,8 @@ static HRESULT d3d11_reload_textures(struct d3d11 *ctx, ID3D11Device *device, ID // - https://docs.microsoft.com/en-us/windows/win32/medfound/10-bit-and-16-bit-yuv-video-formats#p016-and-p010 const bool nv12 = desc->format == MTY_COLOR_FORMAT_NV12; const uint8_t bpp = nv12 ? 1 : 2; - const DXGI_FORMAT format_y = nv12 ? DXGI_FORMAT_R8_UNORM : DXGI_FORMAT_R8G8_UNORM; - const DXGI_FORMAT format_uv = nv12 ? DXGI_FORMAT_R8G8_UNORM : DXGI_FORMAT_R8G8B8A8_UNORM; + const DXGI_FORMAT format_y = nv12 ? DXGI_FORMAT_R8_UNORM : DXGI_FORMAT_R16_UNORM; + const DXGI_FORMAT format_uv = nv12 ? DXGI_FORMAT_R8G8_UNORM : DXGI_FORMAT_R16G16_UNORM; // Y HRESULT e = d3d11_refresh_resource(&ctx->staging[0], device, format_y, desc->cropWidth, desc->cropHeight); @@ -319,7 +319,7 @@ static HRESULT d3d11_reload_textures(struct d3d11 *ctx, ID3D11Device *device, ID e = d3d11_refresh_resource(&ctx->staging[1], device, format_uv, desc->cropWidth / 2, desc->cropHeight / 2); if (e != S_OK) return e; - e = d3d11_crop_copy(context, ctx->staging[1].resource, (uint8_t *) image + desc->imageWidth * desc->imageHeight, desc->cropWidth / 2, desc->cropHeight / 2, desc->imageWidth / 2, bpp * 2); + e = d3d11_crop_copy(context, ctx->staging[1].resource, (uint8_t *) image + desc->imageWidth * desc->imageHeight * bpp, desc->cropWidth / 2, desc->cropHeight / 2, desc->imageWidth / 2, bpp * 2); if (e != S_OK) return e; break; } @@ -327,7 +327,7 @@ static HRESULT d3d11_reload_textures(struct d3d11 *ctx, ID3D11Device *device, ID case MTY_COLOR_FORMAT_I444: case MTY_COLOR_FORMAT_I444_16: { const uint32_t div = desc->format == MTY_COLOR_FORMAT_I420 ? 2 : 1; - const DXGI_FORMAT format = desc->format == MTY_COLOR_FORMAT_I444_16 ? DXGI_FORMAT_R8G8_UNORM : DXGI_FORMAT_R8_UNORM; + const DXGI_FORMAT format = desc->format == MTY_COLOR_FORMAT_I444_16 ? DXGI_FORMAT_R16_UNORM : DXGI_FORMAT_R8_UNORM; const uint8_t bpp = desc->format == MTY_COLOR_FORMAT_I444_16 ? 2 : 1; // Y From 5848c1ac08c5c6d487879ab8b3e15ffccaa8e208 Mon Sep 17 00:00:00 2001 From: Daniel Vijayakumar Date: Tue, 15 Mar 2022 01:50:58 -0400 Subject: [PATCH 06/28] Cleaned up the HDR yuv-to-rgb math for d3d11/d3d12. --- src/windows/gfx/shaders/d3d11/ps.ps4 | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/src/windows/gfx/shaders/d3d11/ps.ps4 b/src/windows/gfx/shaders/d3d11/ps.ps4 index d10d3b1bb..c08e7701f 100644 --- a/src/windows/gfx/shaders/d3d11/ps.ps4 +++ b/src/windows/gfx/shaders/d3d11/ps.ps4 @@ -46,18 +46,14 @@ inline float4 yuv_to_rgba(float y, float u, float v) inline float4 yuv_to_rgba_rec2020(float y, float u, float v) { // Using "RGB to YCbCr color conversion for UHDTV" (ITU-R BT.2020) + // Thanks to https://github.com/colour-science/colour/blob/c3735e5d0ad67443022ece0b42b575e040eb61d1/colour/models/rgb/ycbcr.py#L472 - // y = (y - 0.0625) * 1.164; - // u = u - 0.5; - // v = v - 0.5; - - // Thanks to colour-science (TODO: Add proper link) const float Y_min = 0.06256109f; const float Y_max = 0.91886608f; const float Y_minmax_dt = Y_max - Y_min; const float C_min = 0.06256109f; const float C_max = 0.93841642f; - const float C_minmax_2 = (C_min + C_max) * 0.5f; // TODO: Should we just set this to 0.5f, like in yuv_to_rgba? + const float C_minmax_2 = (C_min + C_max) * 0.5f; const float C_minmax_dt = C_max - C_min; y = y - Y_min; @@ -68,12 +64,12 @@ inline float4 yuv_to_rgba_rec2020(float y, float u, float v) u = u * 1.0f / C_minmax_dt; v = v * 1.0f / C_minmax_dt; - static const float K_r = 0.2627f; - static const float K_b = 0.0593f; + const float K_r = 0.2627f; + const float K_b = 0.0593f; - float r = y + (2.0f - 2.0f * K_r) * v; - float b = y + (2.0f - 2.0f * K_b) * u; - float g = (y - K_r * r - K_b * b) / (1.0f - K_r - K_b); + const float r = y + (2.0f - 2.0f * K_r) * v; + const float b = y + (2.0f - 2.0f * K_b) * u; + const float g = (y - K_r * r - K_b * b) / (1.0f - K_r - K_b); return float4(r, g, b, 1.0); } From 28dd6546f4a5fe059873375d957ad641078ea4f4 Mon Sep 17 00:00:00 2001 From: Daniel Vijayakumar Date: Mon, 21 Mar 2022 18:19:14 -0400 Subject: [PATCH 07/28] The SDR UI is now properly composited on top of HDR video streams. Pending cleanup and refactoring. --- src/matoya.h | 1 + src/windows/gfx/d3d11-ctx.c | 47 +++++++++++++++++------- src/windows/gfx/d3d11-ui.c | 46 +++++++++++++++++++++++ src/windows/gfx/shaders/d3d11/hdr10.hlsl | 46 +++++++++++++++++++++++ src/windows/gfx/shaders/d3d11/ps.ps4 | 36 ++++++++++++++++++ src/windows/gfx/shaders/d3d11/psui.ps4 | 23 +++++++++++- 6 files changed, 183 insertions(+), 16 deletions(-) create mode 100644 src/windows/gfx/shaders/d3d11/hdr10.hlsl diff --git a/src/matoya.h b/src/matoya.h index 5ecb3c31f..257e87b29 100644 --- a/src/matoya.h +++ b/src/matoya.h @@ -185,6 +185,7 @@ typedef struct { uint32_t idxTotalLength; ///< Total number of indices in all command lists. uint32_t vtxTotalLength; ///< Total number of vertices in all command lists. bool clear; ///< Surface should be cleared before drawing. + bool hdr; ///< UI in SDR will be composited on top of an HDR quad. } MTY_DrawData; /// @brief Create an MTY_Renderer capable of executing drawing commands. diff --git a/src/windows/gfx/d3d11-ctx.c b/src/windows/gfx/d3d11-ctx.c index 2779f1c60..894786461 100644 --- a/src/windows/gfx/d3d11-ctx.c +++ b/src/windows/gfx/d3d11-ctx.c @@ -40,6 +40,7 @@ struct d3d11_ctx { IDXGISwapChain2 *swap_chain2; IDXGISwapChain3 *swap_chain3; HANDLE waitable; + bool hdr; }; static void d3d11_ctx_get_size(struct d3d11_ctx *ctx, uint32_t *width, uint32_t *height) @@ -142,16 +143,19 @@ static bool d3d11_ctx_init(struct d3d11_ctx *ctx) ctx->colorspace = MTY_COLOR_SPACE_SRGB; DXGI_SWAP_CHAIN_DESC1 sd = {0}; - sd.Format = DXGI_FORMAT_R8G8B8A8_UNORM; // TODO: Sync this with ctx->format initial value - // sd.Format = DXGI_FORMAT_R16G16B16A16_FLOAT; // TODO: Need to make this an input parameter + sd.Format = DXGI_FORMAT_R8G8B8A8_UNORM; sd.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; sd.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD; sd.SampleDesc.Count = 1; sd.BufferCount = 2; sd.Flags = D3D11_SWFLAGS; - + D3D_FEATURE_LEVEL levels[] = {D3D_FEATURE_LEVEL_11_1, D3D_FEATURE_LEVEL_11_0}; - HRESULT e = D3D11CreateDevice(NULL, D3D_DRIVER_TYPE_HARDWARE, NULL, 0, levels, + UINT flags = 0; + #ifdef DEBUG + flags |= D3D11_CREATE_DEVICE_DEBUG; + #endif + HRESULT e = D3D11CreateDevice(NULL, D3D_DRIVER_TYPE_HARDWARE, NULL, flags, levels, sizeof(levels) / sizeof(D3D_FEATURE_LEVEL), D3D11_SDK_VERSION, &ctx->device, NULL, &ctx->context); if (e != S_OK) { MTY_Log("'D3D11CreateDevice' failed with HRESULT 0x%X", e); @@ -312,21 +316,35 @@ static void d3d11_ctx_refresh(struct d3d11_ctx *ctx) } } - DXGI_FORMAT format = ctx->format_new; - DXGI_COLOR_SPACE_TYPE colorspace = ctx->colorspace_new; + bool hdr = ctx->format_new == DXGI_FORMAT_R16G16B16A16_FLOAT || ctx->colorspace_new == DXGI_COLOR_SPACE_RGB_FULL_G2084_NONE_P2020; + + // TODO: CheckColorSpaceSupport is a finnicky query....it can send false even if color space is supported......we probably should not use it OR figure another option to query...worst case scenario, we just ambitiously try to set HDR and if it fails, just assume SDR + // // Verify display capabilities + // UINT r_cs = 0; + // HRESULT e_cs = IDXGISwapChain3_CheckColorSpaceSupport(ctx->swap_chain3, DXGI_COLOR_SPACE_RGB_FULL_G2084_NONE_P2020, &r_cs); // although we are maintaining an scRGB linear back buffer, we need to query for HDR10 (rec2020 + rec2100 PQ) support in order for HDR support to be known + // if (e_cs == S_OK) { + // hdr = hdr && r_cs > 0; + // } else { + // // Can't determine support, so assume there is none + // hdr = false; + // } + + if (ctx->hdr != hdr) { + // If in HDR mode, we keep swap chain in FP16 scRGB linear; otherwise in SDR mode, it's the standard RGBA8 sRGB + DXGI_FORMAT format = hdr ? DXGI_FORMAT_R16G16B16A16_FLOAT : DXGI_FORMAT_B8G8R8A8_UNORM; + DXGI_COLOR_SPACE_TYPE colorspace = hdr ? DXGI_COLOR_SPACE_RGB_FULL_G10_NONE_P709 : DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P709; - if (ctx->format != format || ctx->colorspace != colorspace) { HRESULT e = IDXGISwapChain2_ResizeBuffers(ctx->swap_chain2, 0, 0, 0, format, D3D11_SWFLAGS); - // TODO: Need to query for display capabilities via CheckColorSpaceSupport before calling SetColorSpace1 e = IDXGISwapChain3_SetColorSpace1(ctx->swap_chain3, colorspace); if (e == S_OK) { - ctx->format = format; - ctx->colorspace = colorspace; - } + ctx->hdr = hdr; + ctx->format = ctx->format_new; + ctx->colorspace = ctx->colorspace_new; - if (DXGI_FATAL(e)) { + } else if (DXGI_FATAL(e)) { + // TODO: Restructure the code so that the FATAL msg is logged upon EVERY update to e. MTY_Log("'IDXGISwapChain2_ResizeBuffers' failed with HRESULT 0x%X", e); d3d11_ctx_free(ctx); d3d11_ctx_init(ctx); @@ -403,13 +421,14 @@ void mty_d3d11_ctx_draw_ui(struct gfx_ctx *gfx_ctx, const MTY_DrawData *dd) { struct d3d11_ctx *ctx = (struct d3d11_ctx *) gfx_ctx; - // TODO: Always render the UI in SDR and composite it on top of the quad + MTY_DrawData dd_mutated = *dd; + dd_mutated.hdr = ctx->hdr; // TODO: Flawed....ideally this comes from the caller so we can reset mty_d3d11_ctx_get_surface(gfx_ctx); if (ctx->back_buffer) MTY_RendererDrawUI(ctx->renderer, MTY_GFX_D3D11, (MTY_Device *) ctx->device, - (MTY_Context *) ctx->context, dd, (MTY_Surface *) ctx->back_buffer); + (MTY_Context *) ctx->context, &dd_mutated, (MTY_Surface *) ctx->back_buffer); } bool mty_d3d11_ctx_set_ui_texture(struct gfx_ctx *gfx_ctx, uint32_t id, const void *rgba, diff --git a/src/windows/gfx/d3d11-ui.c b/src/windows/gfx/d3d11-ui.c index 0e9f89bf1..57b8eb647 100644 --- a/src/windows/gfx/d3d11-ui.c +++ b/src/windows/gfx/d3d11-ui.c @@ -32,6 +32,8 @@ struct d3d11_ui { ID3D11InputLayout *il; ID3D11Buffer *cb; ID3D11Resource *cb_res; + ID3D11Buffer *cbps; + ID3D11Resource *cbps_res; ID3D11PixelShader *ps; ID3D11SamplerState *sampler; ID3D11RasterizerState *rs; @@ -43,6 +45,12 @@ struct d3d11_ui_cb { float proj[4][4]; }; +struct d3d11_ui_cbps { + uint32_t hdr; + float hdr_brighten_factor; + float __pad[2]; // must align to 16 bytes +}; + struct gfx_ui *mty_d3d11_ui_create(MTY_Device *device) { struct d3d11_ui *ctx = MTY_Alloc(1, sizeof(struct d3d11_ui)); @@ -91,6 +99,24 @@ struct gfx_ui *mty_d3d11_ui_create(MTY_Device *device) goto except; } + // Pre create a constant buffer used for storing the pixel shader data + D3D11_BUFFER_DESC desc_cbps = {0}; + desc_cbps.ByteWidth = sizeof(struct d3d11_ui_cbps); + desc_cbps.Usage = D3D11_USAGE_DYNAMIC; + desc_cbps.BindFlags = D3D11_BIND_CONSTANT_BUFFER; + desc_cbps.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + e = ID3D11Device_CreateBuffer(_device, &desc_cbps, NULL, &ctx->cbps); + if (e != S_OK) { + MTY_Log("'ID3D11Device_CreateBuffer' failed with HRESULT 0x%X", e); + goto except; + } + + e = ID3D11Buffer_QueryInterface(ctx->cbps, &IID_ID3D11Resource, &ctx->cbps_res); + if (e != S_OK) { + MTY_Log("'ID3D11Buffer_QueryInterface' failed with HRESULT 0x%X", e); + goto except; + } + // Blend state D3D11_BLEND_DESC bdesc = {0}; bdesc.AlphaToCoverageEnable = false; @@ -277,6 +303,19 @@ bool mty_d3d11_ui_render(struct gfx_ui *gfx_ui, MTY_Device *device, MTY_Context memcpy(&cb->proj, proj, sizeof(proj)); ID3D11DeviceContext_Unmap(_context, ctx->cb_res, 0); + // Update pixel shader constant buffer data + struct d3d11_ui_cbps cbps = {0}; + cbps.hdr = (uint32_t) dd->hdr; + cbps.hdr_brighten_factor = 2.5f; // XXX: this is something that we should allow the user to configure via client settings + + D3D11_MAPPED_SUBRESOURCE cbps_map = {0}; + e = ID3D11DeviceContext_Map(_context, ctx->cbps_res, 0, D3D11_MAP_WRITE_DISCARD, 0, &cbps_map); + if (e != S_OK) + goto except; + + memcpy(cbps_map.pData, &cbps, sizeof(struct d3d11_ui_cbps)); + ID3D11DeviceContext_Unmap(_context, ctx->cbps_res, 0); + // Set render target (wraps the texture) if (_dest) { e = ID3D11Texture2D_QueryInterface(_dest, &IID_ID3D11Resource, &tex_res); @@ -317,6 +356,7 @@ bool mty_d3d11_ui_render(struct gfx_ui *gfx_ui, MTY_Device *device, MTY_Context ID3D11DeviceContext_VSSetShader(_context, ctx->vs, NULL, 0); ID3D11DeviceContext_VSSetConstantBuffers(_context, 0, 1, &ctx->cb); ID3D11DeviceContext_PSSetShader(_context, ctx->ps, NULL, 0); + ID3D11DeviceContext_PSSetConstantBuffers(_context, 0, 1, &ctx->cbps); ID3D11DeviceContext_PSSetSamplers(_context, 0, 1, &ctx->sampler); const float blend_factor[4] = {0.0f, 0.0f, 0.0f, 0.0f}; @@ -480,6 +520,12 @@ void mty_d3d11_ui_destroy(struct gfx_ui **gfx_ui) if (ctx->cb) ID3D11Buffer_Release(ctx->cb); + if (ctx->cbps_res) + ID3D11Resource_Release(ctx->cbps_res); + + if (ctx->cbps) + ID3D11Buffer_Release(ctx->cbps); + if (ctx->il) ID3D11InputLayout_Release(ctx->il); diff --git a/src/windows/gfx/shaders/d3d11/hdr10.hlsl b/src/windows/gfx/shaders/d3d11/hdr10.hlsl new file mode 100644 index 000000000..546b680cd --- /dev/null +++ b/src/windows/gfx/shaders/d3d11/hdr10.hlsl @@ -0,0 +1,46 @@ +float spow(float x, float p) +{ + return sign(x) * pow(abs(x), p); +} + +float3 spow3(float3 v, float p) +{ + return float3(spow(v.x, p), spow(v.y, p), spow(v.z, p)); +} + +static const float PQ_m_1 = 2610.0f / 4096.0f / 4.0f; +static const float PQ_m_1_d = 1.0f / PQ_m_1; +static const float PQ_m_2 = 2523.0f / 4096.0f * 128.0f; +static const float PQ_m_2_d = 1.0f / PQ_m_2; +static const float PQ_c_1 = 3424.0f / 4096.0f; +static const float PQ_c_2 = 2413.0f / 4096.0f * 32.0f; +static const float PQ_c_3 = 2392.0f / 4096.0f * 32.0f; + +static const float HDR10_MAX_NITS = 10000.0f; + +float3 rec2020_pq_to_rec2020_linear(float3 color, float sdr_max_nits) +{ + // Apply the PQ EOTF (SMPTE ST 2084-2014) in order to linearize it + // Courtesy of https://github.com/colour-science/colour/blob/38782ac059e8ddd91939f3432bf06811c16667f0/colour/models/rgb/transfer_functions/st_2084.py#L126 + + float3 V_p = spow3(color, PQ_m_2_d); + + float3 n = max(0, V_p - PQ_c_1); + + float3 L = spow3(n / (PQ_c_2 - PQ_c_3 * V_p), PQ_m_1_d); + float3 C = L * HDR10_MAX_NITS / sdr_max_nits; + + return C; +} + +float3 rec2020_linear_to_rec2020_pq(float3 color, float sdr_max_nits) +{ + // Apply the inverse of the PQ EOTF (SMPTE ST 2084-2014) in order to encode the signal as PQ + // Courtesy of https://github.com/colour-science/colour/blob/38782ac059e8ddd91939f3432bf06811c16667f0/colour/models/rgb/transfer_functions/st_2084.py#L56 + + float3 Y_p = spow3(saturate(color / HDR10_MAX_NITS) * sdr_max_nits, PQ_m_1); + + float3 N = spow3((PQ_c_1 + PQ_c_2 * Y_p) / (PQ_c_3 * Y_p + 1.0f), PQ_m_2); + + return N; +} diff --git a/src/windows/gfx/shaders/d3d11/ps.ps4 b/src/windows/gfx/shaders/d3d11/ps.ps4 index c08e7701f..e93a4b244 100644 --- a/src/windows/gfx/shaders/d3d11/ps.ps4 +++ b/src/windows/gfx/shaders/d3d11/ps.ps4 @@ -4,6 +4,8 @@ // If a copy of the MIT License was not distributed with this file, // You can obtain one at https://spdx.org/licenses/MIT.html. +#include "hdr10.hlsl" + struct VS_OUTPUT { float4 position : SV_POSITION; float2 texcoord : TEXCOORD; @@ -74,6 +76,36 @@ inline float4 yuv_to_rgba_rec2020(float y, float u, float v) return float4(r, g, b, 1.0); } +static const float SDR_MAX_NITS = 80.0f; + +// This is the precomputed matrix that transforms linear rec2020 to ACES to linear sRGB/rec709. +// Computed as B * A, where +// A = [rec2020 => XYZ => D65_2_D60 => AP1 => RRT_SAT] +// 0.9411843241 0.04576699764 0.00553454759 +// 0.00737755958 0.98266607517 0.00244228163 +// 0.00989047793 0.05050263667 0.93209270519 +// and B = [ODT_SAT => XYZ => D60_2_D65 => sRGB] +// 1.60475 -0.53108 -0.07367 +// -0.10208 1.10813 -0.00605 +// -0.00327 -0.07276 1.07602 +static const float3x3 REC2020_TO_SRGB = +{ + {1.505718838, -0.452150239, -0.061082751}, + {-0.087960638, 1.083944322, -0.003497762}, + {0.007027888, -0.017306595, 1.002754594} +}; + +inline float4 rec2020_pq_to_scrgb_linear(float4 rgba) +{ + float3 color = rgba.rgb; + + color = rec2020_pq_to_rec2020_linear(color, SDR_MAX_NITS); // the HDR10 frame has already been encoded using the ST2084 PQ curve, so we need to decode it in order to get back linear colors + color = mul(REC2020_TO_SRGB, color); + color = clamp(color, -0.5f, 7.4999f); // range of scRGB according to https://www.color.org/chardata/rgb/scrgb.xalter + + return float4(color, rgba.a); +} + inline void gaussian(uint type, float w, float h, inout float2 uv) { float2 res = float2(w, h); @@ -135,6 +167,8 @@ float4 main(VS_OUTPUT input) : SV_TARGET if (format == 9 && colorspace == 3) { rgba = yuv_to_rgba_rec2020(y, u, v); + rgba = rec2020_pq_to_scrgb_linear(rgba); + } else { rgba = yuv_to_rgba(y, u, v); } @@ -147,6 +181,7 @@ float4 main(VS_OUTPUT input) : SV_TARGET if (format == 10 && colorspace == 3) { rgba = yuv_to_rgba_rec2020(y, u, v); + rgba = rec2020_pq_to_scrgb_linear(rgba); } else { rgba = yuv_to_rgba(y, u, v); } @@ -159,6 +194,7 @@ float4 main(VS_OUTPUT input) : SV_TARGET rgba = yuv_to_rgba(y, u, v); + // TODO: This section is a bit obsolete I think....re-evaluate if its needed // RGB10A2, either sRGB or HDR10 } else if (format == 11 && (colorspace == 1 || colorspace == 3)) { if (colorspace == 1) { diff --git a/src/windows/gfx/shaders/d3d11/psui.ps4 b/src/windows/gfx/shaders/d3d11/psui.ps4 index e7094d6bc..e84a08db1 100644 --- a/src/windows/gfx/shaders/d3d11/psui.ps4 +++ b/src/windows/gfx/shaders/d3d11/psui.ps4 @@ -10,11 +10,30 @@ struct PS_INPUT { float2 uv : TEXCOORD0; }; +cbuffer VS_CONSTANT_BUFFER : register(b0) { + uint hdr; + float hdr_brighten_factor; +}; + sampler sampler0; Texture2D texture0; +float3 srgb_to_linear(float3 color) +{ + // Fast approximation of sRGB's transfer function + return pow(abs(saturate(color)), 2.2f); +} + float4 main(PS_INPUT input) : SV_Target { - // TODO: We will probably need to convert the sRGB UI into linear space as part of compositing UI on top of HDR frame - return input.col * texture0.Sample(sampler0, input.uv); + float4 ui = input.col * texture0.Sample(sampler0, input.uv); + + if (hdr) { + float3 ui_rgb = ui.rgb; + ui_rgb = srgb_to_linear(ui_rgb); // UI texture is encoded non-linearly in sRGB whereas the render target is in scRGB linear, so we need to linearize the UI + ui_rgb *= hdr_brighten_factor; // 1.0 in scRGB is 80 nits which is the reference SDR luminance but most SDR displays will actually render 1.0 at around 200-300 nits for improved viewing; we mimic this by brightening the UI texture by a configurable constant + ui.rgb = ui_rgb; + } + + return ui; } From aff660520c371869b1b85dc3715a8b6a435a2ab9 Mon Sep 17 00:00:00 2001 From: Daniel Vijayakumar Date: Tue, 22 Mar 2022 13:57:26 -0400 Subject: [PATCH 08/28] Added HDR metadata support for rendering quads. --- src/matoya.h | 14 ++++++++++++++ src/windows/gfx/d3d11-ctx.c | 38 +++++++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) diff --git a/src/matoya.h b/src/matoya.h index 257e87b29..4109480a7 100644 --- a/src/matoya.h +++ b/src/matoya.h @@ -113,6 +113,18 @@ typedef enum { MTY_ROTATION_MAKE_32 = INT32_MAX, } MTY_Rotation; +/// @brief HDR metadata associated with an image being rendered. +typedef struct { + float color_primary_red[2]; ///< xy coordinates for the red primary of the image's color space according to the CIE 1931 color space chromaticity diagram. + float color_primary_green[2]; ///< xy coordinates for the green primary of the image's color space according to the CIE 1931 color space chromaticity diagram. + float color_primary_blue[2]; ///< xy coordinates for the blue primary of the image's color space according to the CIE 1931 color space chromaticity diagram. + float white_point[2]; ///< xy coordinates for the white point of the image's color space according to the CIE 1931 color space chromaticity diagram. + float min_luminance; ///< Min luminance supported by the display that is the source of the image. + float max_luminance; ///< Max luminance supported by the display that is the source of the image. + float max_content_light_level; ///< MaxCLL. This is the nit value of the brightest possible pixel that could ever occur in an image. If unknown, you can set it to max_luminance. + float max_frame_average_light_level; ///< MaxFALL. This is the highest nit value that an image's average luminance is expected to have. If unknown, you can set it to MaxCLL. +} MTY_HDRDesc; + /// @brief Description of a render operation. typedef struct { MTY_ColorFormat format; ///< The color format of a raw image. @@ -131,6 +143,8 @@ typedef struct { float scale; ///< Multiplier applied to the dimensions of the image, producing an ///< minimized or magnified image. This can be set to 0 ///< if unnecessary. + bool hdrDescSpecified; ///< Is HDR metadata provided. Only relevant if format + colorspace indicate an HDR image. + MTY_HDRDesc hdrDesc; ///< HDR metadata for the image. Only relevant if format + colorspace indicate an HDR image. } MTY_RenderDesc; /// @brief A point with an `x` and `y` coordinate. diff --git a/src/windows/gfx/d3d11-ctx.c b/src/windows/gfx/d3d11-ctx.c index 894786461..adda91eb4 100644 --- a/src/windows/gfx/d3d11-ctx.c +++ b/src/windows/gfx/d3d11-ctx.c @@ -11,6 +11,7 @@ GFX_CTX_PROTOTYPES(_d3d11_) #include #include #include +#include #define DXGI_FATAL(e) ( \ (e) == DXGI_ERROR_DEVICE_REMOVED || \ @@ -39,8 +40,10 @@ struct d3d11_ctx { ID3D11Texture2D *back_buffer; IDXGISwapChain2 *swap_chain2; IDXGISwapChain3 *swap_chain3; + IDXGISwapChain4 *swap_chain4; HANDLE waitable; bool hdr; + MTY_HDRDesc hdr_desc; }; static void d3d11_ctx_get_size(struct d3d11_ctx *ctx, uint32_t *width, uint32_t *height) @@ -117,6 +120,9 @@ static void d3d11_ctx_free(struct d3d11_ctx *ctx) if (ctx->swap_chain3) IDXGISwapChain3_Release(ctx->swap_chain3); + if (ctx->swap_chain4) + IDXGISwapChain4_Release(ctx->swap_chain4); + if (ctx->context) ID3D11DeviceContext_Release(ctx->context); @@ -127,6 +133,7 @@ static void d3d11_ctx_free(struct d3d11_ctx *ctx) ctx->waitable = NULL; ctx->swap_chain2 = NULL; ctx->swap_chain3 = NULL; + ctx->swap_chain4 = NULL; ctx->context = NULL; ctx->device = NULL; } @@ -204,6 +211,12 @@ static bool d3d11_ctx_init(struct d3d11_ctx *ctx) goto except; } + e = IDXGISwapChain1_QueryInterface(swap_chain1, &IID_IDXGISwapChain4, &ctx->swap_chain4); + if (e != S_OK) { + MTY_Log("'IDXGISwapChain1_QueryInterface' failed with HRESULT 0x%X", e); + goto except; + } + ctx->waitable = IDXGISwapChain2_GetFrameLatencyWaitableObject(ctx->swap_chain2); if (!ctx->waitable) { e = !S_OK; @@ -350,6 +363,28 @@ static void d3d11_ctx_refresh(struct d3d11_ctx *ctx) d3d11_ctx_init(ctx); } } + + if (ctx->hdr) { + // Update to the latest known HDR metadata + DXGI_HDR_METADATA_HDR10 hdr_desc = {0}; + hdr_desc.RedPrimary[0] = (UINT16) (ctx->hdr_desc.color_primary_red[0] * 50000); // primaries and white point are normalized to 50000 + hdr_desc.RedPrimary[1] = (UINT16) (ctx->hdr_desc.color_primary_red[1] * 50000); + hdr_desc.GreenPrimary[0] = (UINT16) (ctx->hdr_desc.color_primary_green[0] * 50000); + hdr_desc.GreenPrimary[1] = (UINT16) (ctx->hdr_desc.color_primary_green[1] * 50000); + hdr_desc.BluePrimary[0] = (UINT16) (ctx->hdr_desc.color_primary_blue[0] * 50000); + hdr_desc.BluePrimary[1] = (UINT16) (ctx->hdr_desc.color_primary_blue[1] * 50000); + hdr_desc.WhitePoint[0] = (UINT16) (ctx->hdr_desc.white_point[0] * 50000); + hdr_desc.WhitePoint[1] = (UINT16) (ctx->hdr_desc.white_point[1] * 50000); + hdr_desc.MinMasteringLuminance = (UINT) ctx->hdr_desc.min_luminance * 10000; // MinMasteringLuminance is specified as 1/10000th of a nit + hdr_desc.MaxMasteringLuminance = (UINT) ctx->hdr_desc.max_luminance; + hdr_desc.MaxContentLightLevel = (UINT16) ctx->hdr_desc.max_content_light_level; + hdr_desc.MaxFrameAverageLightLevel = (UINT16) ctx->hdr_desc.max_frame_average_light_level; + + HRESULT e = IDXGISwapChain4_SetHDRMetaData(ctx->swap_chain4, DXGI_HDR_METADATA_TYPE_HDR10, sizeof(hdr_desc), &hdr_desc); + if (e != S_OK) { + MTY_Log("Unable to set HDR metadata: 'IDXGISwapChain4_SetHDRMetaData' failed with HRESULT 0x%X", e); + } + } } MTY_Surface *mty_d3d11_ctx_get_surface(struct gfx_ctx *gfx_ctx) @@ -405,6 +440,9 @@ void mty_d3d11_ctx_draw_quad(struct gfx_ctx *gfx_ctx, const void *image, const M ctx->format_new = format; ctx->colorspace_new = colorspace; + if (desc->hdrDescSpecified) + ctx->hdr_desc = desc->hdrDesc; + mty_d3d11_ctx_get_surface(gfx_ctx); if (ctx->back_buffer) { From 44693ca4f0499ca7772466e51e95e17ab3402e35 Mon Sep 17 00:00:00 2001 From: Daniel Vijayakumar Date: Wed, 23 Mar 2022 15:20:14 -0400 Subject: [PATCH 09/28] Cleanup and refactor. --- src/windows/gfx/d3d11-ctx.c | 60 +++++++++++++++++++++---------------- 1 file changed, 34 insertions(+), 26 deletions(-) diff --git a/src/windows/gfx/d3d11-ctx.c b/src/windows/gfx/d3d11-ctx.c index adda91eb4..91a589a3a 100644 --- a/src/windows/gfx/d3d11-ctx.c +++ b/src/windows/gfx/d3d11-ctx.c @@ -43,6 +43,7 @@ struct d3d11_ctx { IDXGISwapChain4 *swap_chain4; HANDLE waitable; bool hdr; + bool composite_ui; MTY_HDRDesc hdr_desc; }; @@ -90,14 +91,21 @@ static void mty_validate_format_colorspace(struct d3d11_ctx *ctx, MTY_ColorForma } // Ensure that the format and colorspace are a valid pairing - // TODO: An improvement would be to log an error as well instead of only forcing the values switch (colorspace_new) { - case DXGI_COLOR_SPACE_RGB_FULL_G10_NONE_P709: - format_new = DXGI_FORMAT_R16G16B16A16_FLOAT; + case DXGI_COLOR_SPACE_RGB_FULL_G10_NONE_P709: { + if (format_new != DXGI_FORMAT_R16G16B16A16_FLOAT) { + MTY_Log("Format 0x%X is not meant for colorspace 0x%X. Forcing format to 0x%X.", format_new, DXGI_COLOR_SPACE_RGB_FULL_G10_NONE_P709, DXGI_FORMAT_R16G16B16A16_FLOAT); + format_new = DXGI_FORMAT_R16G16B16A16_FLOAT; + } break; - case DXGI_COLOR_SPACE_RGB_FULL_G2084_NONE_P2020: - format_new = DXGI_FORMAT_R10G10B10A2_UNORM; + } + case DXGI_COLOR_SPACE_RGB_FULL_G2084_NONE_P2020: { + if (format_new != DXGI_FORMAT_R10G10B10A2_UNORM) { + MTY_Log("Format 0x%X is not meant for colorspace 0x%X. Forcing format to 0x%X.", format_new, DXGI_COLOR_SPACE_RGB_FULL_G2084_NONE_P2020, DXGI_FORMAT_R10G10B10A2_UNORM); + format_new = DXGI_FORMAT_R10G10B10A2_UNORM; + } break; + } default: break; } @@ -331,33 +339,26 @@ static void d3d11_ctx_refresh(struct d3d11_ctx *ctx) bool hdr = ctx->format_new == DXGI_FORMAT_R16G16B16A16_FLOAT || ctx->colorspace_new == DXGI_COLOR_SPACE_RGB_FULL_G2084_NONE_P2020; - // TODO: CheckColorSpaceSupport is a finnicky query....it can send false even if color space is supported......we probably should not use it OR figure another option to query...worst case scenario, we just ambitiously try to set HDR and if it fails, just assume SDR - // // Verify display capabilities - // UINT r_cs = 0; - // HRESULT e_cs = IDXGISwapChain3_CheckColorSpaceSupport(ctx->swap_chain3, DXGI_COLOR_SPACE_RGB_FULL_G2084_NONE_P2020, &r_cs); // although we are maintaining an scRGB linear back buffer, we need to query for HDR10 (rec2020 + rec2100 PQ) support in order for HDR support to be known - // if (e_cs == S_OK) { - // hdr = hdr && r_cs > 0; - // } else { - // // Can't determine support, so assume there is none - // hdr = false; - // } - if (ctx->hdr != hdr) { // If in HDR mode, we keep swap chain in FP16 scRGB linear; otherwise in SDR mode, it's the standard RGBA8 sRGB DXGI_FORMAT format = hdr ? DXGI_FORMAT_R16G16B16A16_FLOAT : DXGI_FORMAT_B8G8R8A8_UNORM; DXGI_COLOR_SPACE_TYPE colorspace = hdr ? DXGI_COLOR_SPACE_RGB_FULL_G10_NONE_P709 : DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P709; - HRESULT e = IDXGISwapChain2_ResizeBuffers(ctx->swap_chain2, 0, 0, 0, - format, D3D11_SWFLAGS); - e = IDXGISwapChain3_SetColorSpace1(ctx->swap_chain3, colorspace); - + HRESULT e = IDXGISwapChain2_ResizeBuffers(ctx->swap_chain2, 0, 0, 0, format, D3D11_SWFLAGS); if (e == S_OK) { - ctx->hdr = hdr; - ctx->format = ctx->format_new; - ctx->colorspace = ctx->colorspace_new; - + e = IDXGISwapChain3_SetColorSpace1(ctx->swap_chain3, colorspace); + + if (e == S_OK) { + ctx->hdr = hdr; + ctx->format = ctx->format_new; + ctx->colorspace = ctx->colorspace_new; + + } else if (DXGI_FATAL(e)) { + MTY_Log("'IDXGISwapChain3_SetColorSpace1' failed with HRESULT 0x%X", e); + d3d11_ctx_free(ctx); + d3d11_ctx_init(ctx); + } } else if (DXGI_FATAL(e)) { - // TODO: Restructure the code so that the FATAL msg is logged upon EVERY update to e. MTY_Log("'IDXGISwapChain2_ResizeBuffers' failed with HRESULT 0x%X", e); d3d11_ctx_free(ctx); d3d11_ctx_init(ctx); @@ -417,6 +418,8 @@ void mty_d3d11_ctx_present(struct gfx_ctx *gfx_ctx, uint32_t interval) ID3D11Texture2D_Release(ctx->back_buffer); ctx->back_buffer = NULL; + ctx->composite_ui = false; + if (DXGI_FATAL(e)) { MTY_Log("'IDXGISwapChain2_Present' failed with HRESULT 0x%X", e); d3d11_ctx_free(ctx); @@ -452,6 +455,8 @@ void mty_d3d11_ctx_draw_quad(struct gfx_ctx *gfx_ctx, const void *image, const M MTY_RendererDrawQuad(ctx->renderer, MTY_GFX_D3D11, (MTY_Device *) ctx->device, (MTY_Context *) ctx->context, image, &mutated, (MTY_Surface *) ctx->back_buffer); + + ctx->composite_ui = ctx->hdr; } } @@ -460,7 +465,10 @@ void mty_d3d11_ctx_draw_ui(struct gfx_ctx *gfx_ctx, const MTY_DrawData *dd) struct d3d11_ctx *ctx = (struct d3d11_ctx *) gfx_ctx; MTY_DrawData dd_mutated = *dd; - dd_mutated.hdr = ctx->hdr; // TODO: Flawed....ideally this comes from the caller so we can reset + dd_mutated.hdr = ctx->composite_ui; + + ctx->format_new = dd_mutated.hdr ? DXGI_FORMAT_R16G16B16A16_FLOAT : DXGI_FORMAT_R8G8B8A8_UNORM; + ctx->colorspace_new = dd_mutated.hdr ? DXGI_COLOR_SPACE_RGB_FULL_G10_NONE_P709 : DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P709; mty_d3d11_ctx_get_surface(gfx_ctx); From 7e791ab4dfd560f4340aab1d375eae97f4b1ad58 Mon Sep 17 00:00:00 2001 From: Daniel Vijayakumar Date: Fri, 25 Mar 2022 11:13:42 -0400 Subject: [PATCH 10/28] Swapped in the rec2020 -> rec709 transform that OBS uses and just like that the color discoloration issue has gone. I need to study how they derive that matrix. --- src/windows/gfx/shaders/d3d11/ps.ps4 | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/windows/gfx/shaders/d3d11/ps.ps4 b/src/windows/gfx/shaders/d3d11/ps.ps4 index e93a4b244..d68e6a62d 100644 --- a/src/windows/gfx/shaders/d3d11/ps.ps4 +++ b/src/windows/gfx/shaders/d3d11/ps.ps4 @@ -94,13 +94,19 @@ static const float3x3 REC2020_TO_SRGB = {-0.087960638, 1.083944322, -0.003497762}, {0.007027888, -0.017306595, 1.002754594} }; +static const float3x3 REC2020_TO_SRGB_obs = +{ + {1.6604910, -0.5876411, -0.0728499}, + {-0.087960638, 1.083944322, -0.0083494}, + {-0.0181508, -0.1005789, 1.1187297 } +}; inline float4 rec2020_pq_to_scrgb_linear(float4 rgba) { float3 color = rgba.rgb; color = rec2020_pq_to_rec2020_linear(color, SDR_MAX_NITS); // the HDR10 frame has already been encoded using the ST2084 PQ curve, so we need to decode it in order to get back linear colors - color = mul(REC2020_TO_SRGB, color); + color = mul(REC2020_TO_SRGB_obs, color); color = clamp(color, -0.5f, 7.4999f); // range of scRGB according to https://www.color.org/chardata/rgb/scrgb.xalter return float4(color, rgba.a); From ea48c6366f8052019206af7844ffdf34e8d4bb56 Mon Sep 17 00:00:00 2001 From: Daniel Vijayakumar Date: Fri, 25 Mar 2022 15:33:18 -0400 Subject: [PATCH 11/28] Added matoya support for querying if a given window supports HDR. Still yet to implement the platform-specific logic though. --- src/app.c | 8 ++++++++ src/gfx/mod-ctx.h | 4 +++- src/matoya.h | 6 ++++++ src/windows/gfx/d3d11-ctx.c | 9 +++++++++ src/windows/gfx/d3d12-ctx.c | 7 +++++++ src/windows/gfx/d3d9-ctx.c | 5 +++++ src/windows/gfx/gl-ctx.c | 5 +++++ 7 files changed, 43 insertions(+), 1 deletion(-) diff --git a/src/app.c b/src/app.c index 87ce6c30b..6ddf8ec4d 100644 --- a/src/app.c +++ b/src/app.c @@ -129,6 +129,14 @@ bool MTY_WindowSetGFX(MTY_App *app, MTY_Window window, MTY_GFX api, bool vsync) return gfx_ctx ? true : false; } +bool MTY_WindowIsHDRSupported(MTY_App *app, MTY_Window window) +{ + struct gfx_ctx *gfx_ctx = NULL; + MTY_GFX api = mty_window_get_gfx(app, window, &gfx_ctx); + + return api != MTY_GFX_NONE && GFX_CTX_API[api].hdr_supported(gfx_ctx); +} + // Event utility diff --git a/src/gfx/mod-ctx.h b/src/gfx/mod-ctx.h index c5b1865ba..3048fce74 100644 --- a/src/gfx/mod-ctx.h +++ b/src/gfx/mod-ctx.h @@ -31,7 +31,8 @@ struct gfx_ctx; bool wrap(api, set_ui_texture)(struct gfx_ctx *gfx_ctx, uint32_t id, const void *rgba, \ uint32_t width, uint32_t height); \ bool wrap(api, has_ui_texture)(struct gfx_ctx *gfx_ctx, uint32_t id); \ - bool wrap(api, make_current)(struct gfx_ctx *gfx_ctx, bool current); + bool wrap(api, make_current)(struct gfx_ctx *gfx_ctx, bool current); \ + bool wrap(api, hdr_supported)(struct gfx_ctx *gfx_ctx); #define GFX_CTX_PROTOTYPES(api) \ GFX_CTX_DECLARE_API(api, GFX_CTX_PROTO) @@ -49,4 +50,5 @@ struct gfx_ctx; mty##api##ctx_set_ui_texture, \ mty##api##ctx_has_ui_texture, \ mty##api##ctx_make_current, \ + mty##api##ctx_hdr_supported, \ }, diff --git a/src/matoya.h b/src/matoya.h index 4109480a7..0669d6094 100644 --- a/src/matoya.h +++ b/src/matoya.h @@ -1287,6 +1287,12 @@ MTY_WindowSetGFX(MTY_App *app, MTY_Window window, MTY_GFX api, bool vsync); MTY_EXPORT MTY_ContextState MTY_WindowGetContextState(MTY_App *app, MTY_Window window); +/// @brief Tells you whether the window can show HDR content or not. +/// @param app The MTY_App. +/// @param window An MTY_Window. +MTY_EXPORT bool +MTY_WindowIsHDRSupported(MTY_App *app, MTY_Window window); + /// @brief Get the string representation of a key combination. /// @details This function attempts to use the current locale. /// @param mod Combo modifier. diff --git a/src/windows/gfx/d3d11-ctx.c b/src/windows/gfx/d3d11-ctx.c index 91a589a3a..025b24839 100644 --- a/src/windows/gfx/d3d11-ctx.c +++ b/src/windows/gfx/d3d11-ctx.c @@ -497,3 +497,12 @@ bool mty_d3d11_ctx_make_current(struct gfx_ctx *gfx_ctx, bool current) { return false; } + +bool mty_d3d11_ctx_hdr_supported(struct gfx_ctx *gfx_ctx) +{ + struct d3d11_ctx *ctx = (struct d3d11_ctx *) gfx_ctx; + + // TODO: Query the output6 that intersects with the window. But only do this if IsCurrent is false + + return true; +} diff --git a/src/windows/gfx/d3d12-ctx.c b/src/windows/gfx/d3d12-ctx.c index 2807a559c..fe76c6c55 100644 --- a/src/windows/gfx/d3d12-ctx.c +++ b/src/windows/gfx/d3d12-ctx.c @@ -559,3 +559,10 @@ bool mty_d3d12_ctx_make_current(struct gfx_ctx *gfx_ctx, bool current) { return false; } + +bool mty_d3d12_ctx_hdr_supported(struct gfx_ctx *gfx_ctx) +{ + // TODO + + return false; +} diff --git a/src/windows/gfx/d3d9-ctx.c b/src/windows/gfx/d3d9-ctx.c index a7f97355d..81000fc61 100644 --- a/src/windows/gfx/d3d9-ctx.c +++ b/src/windows/gfx/d3d9-ctx.c @@ -319,3 +319,8 @@ bool mty_d3d9_ctx_make_current(struct gfx_ctx *gfx_ctx, bool current) { return false; } + +bool mty_d3d9_ctx_hdr_supported(struct gfx_ctx *gfx_ctx) +{ + return false; +} diff --git a/src/windows/gfx/gl-ctx.c b/src/windows/gfx/gl-ctx.c index af2a67525..f0cc9350b 100644 --- a/src/windows/gfx/gl-ctx.c +++ b/src/windows/gfx/gl-ctx.c @@ -199,3 +199,8 @@ bool mty_gl_ctx_make_current(struct gfx_ctx *gfx_ctx, bool current) return r; } + +bool mty_gl_ctx_hdr_supported(struct gfx_ctx *gfx_ctx) +{ + return false; +} \ No newline at end of file From 8cfe32b0b49b2389b273d7864637366f2d92d949 Mon Sep 17 00:00:00 2001 From: Daniel Vijayakumar Date: Fri, 25 Mar 2022 17:16:23 -0400 Subject: [PATCH 12/28] Implemented D3D11 detection of HDR support. --- src/windows/gfx/d3d11-ctx.c | 111 +++++++++++++++++++++++++++++++++++- 1 file changed, 108 insertions(+), 3 deletions(-) diff --git a/src/windows/gfx/d3d11-ctx.c b/src/windows/gfx/d3d11-ctx.c index 025b24839..92fc44815 100644 --- a/src/windows/gfx/d3d11-ctx.c +++ b/src/windows/gfx/d3d11-ctx.c @@ -12,6 +12,7 @@ GFX_CTX_PROTOTYPES(_d3d11_) #include #include #include +#include #define DXGI_FATAL(e) ( \ (e) == DXGI_ERROR_DEVICE_REMOVED || \ @@ -41,7 +42,9 @@ struct d3d11_ctx { IDXGISwapChain2 *swap_chain2; IDXGISwapChain3 *swap_chain3; IDXGISwapChain4 *swap_chain4; + IDXGIFactory1 *factory1; HANDLE waitable; + bool hdr_supported; bool hdr; bool composite_ui; MTY_HDRDesc hdr_desc; @@ -114,6 +117,94 @@ static void mty_validate_format_colorspace(struct d3d11_ctx *ctx, MTY_ColorForma *colorspace_out = colorspace_new; } +static bool d3d11_ctx_query_hdr_support(struct d3d11_ctx *ctx) +{ + bool r = false; + + // Courtesy of MSDN https://docs.microsoft.com/en-us/windows/win32/direct3darticles/high-dynamic-range + + // Iterate through the DXGI outputs associated with the DXGI adapter, + // and find the output whose bounds have the greatest overlap with the + // app window (i.e. the output for which the intersection area is the + // greatest). + + // Must create the factory afresh each time, otherwise you'll get a stale value at the end + if (ctx->factory1) { + IDXGIFactory1_Release(ctx->factory1); + ctx->factory1 = NULL; + } + HRESULT e = CreateDXGIFactory1(&IID_IDXGIFactory1, &ctx->factory1); + if (e != S_OK) { + MTY_Log("'CreateDXGIFactory1' failed with HRESULT 0x%X", e); + return r; + } + + // Go through the outputs of each and every adapter + IDXGIOutput *current_output = NULL; + IDXGIOutput *best_output = NULL; + float best_intersect_area = -1; + IDXGIAdapter1 *adapter1 = NULL; + for (UINT j = 0; IDXGIFactory1_EnumAdapters1(ctx->factory1, j, &adapter1) != DXGI_ERROR_NOT_FOUND; j++) { + + for (UINT i = 0; IDXGIAdapter1_EnumOutputs(adapter1, i, ¤t_output) != DXGI_ERROR_NOT_FOUND; i++) { + + // Get the retangle bounds of the app window + RECT window_bounds = {0}; + GetClientRect(ctx->hwnd, &window_bounds); + LONG ax1 = window_bounds.left; + LONG ay1 = window_bounds.top; + LONG ax2 = window_bounds.right; + LONG ay2 = window_bounds.bottom; + + // Get the rectangle bounds of current output + DXGI_OUTPUT_DESC desc = {0}; + e = IDXGIOutput_GetDesc(current_output, &desc); + if (e != S_OK) { + MTY_Log("'IDXGIOutput_GetDesc' failed with HRESULT 0x%X", e); + } else { + RECT output_bounds = desc.DesktopCoordinates; + LONG bx1 = output_bounds.left; + LONG by1 = output_bounds.top; + LONG bx2 = output_bounds.right; + LONG by2 = output_bounds.bottom; + + // Compute the intersection and see if its the best fit + LONG intersect_area = max(0, min(ax2, bx2) - max(ax1, bx1)) * max(0, min(ay2, by2) - max(ay1, by1)); // courtesy of https://github.com/microsoft/DirectX-Graphics-Samples/blob/c79f839da1bb2db77d2306be5e4e664a5d23a36b/Samples/Desktop/D3D12HDR/src/D3D12HDR.cpp#L1046 + if (intersect_area > best_intersect_area) { + best_output = current_output; + best_intersect_area = (float) intersect_area; // not sure why but the MSDN sample stores this as float when its all integer math...it works though! + } else { + IDXGIOutput_Release(current_output); + } + } + } + + IDXGIAdapter1_Release(adapter1); + } + + // Having determined the output (display) upon which the app is primarily being + // rendered, retrieve the HDR capabilities of that display by checking the color space. + IDXGIOutput6 *output6 = NULL; + e = IDXGIOutput_QueryInterface(best_output, &IID_IDXGIOutput6, &output6); + if (e != S_OK) { + MTY_Log("'IDXGIOutput_QueryInterface' failed with HRESULT 0x%X", e); + } else { + DXGI_OUTPUT_DESC1 desc1 = {0}; + e = IDXGIOutput6_GetDesc1(output6, &desc1); + if (e != S_OK) { + MTY_Log("'IDXGIOutput6_GetDesc1' failed with HRESULT 0x%X", e); + } else { + r = desc1.ColorSpace == DXGI_COLOR_SPACE_RGB_FULL_G2084_NONE_P2020; // this is the canonical check according to MSDN and NVIDIA + } + + IDXGIOutput6_Release(output6); + } + + IDXGIOutput_Release(best_output); + + return r; +} + static void d3d11_ctx_free(struct d3d11_ctx *ctx) { if (ctx->back_buffer) @@ -131,6 +222,9 @@ static void d3d11_ctx_free(struct d3d11_ctx *ctx) if (ctx->swap_chain4) IDXGISwapChain4_Release(ctx->swap_chain4); + if (ctx->factory1) + IDXGIFactory1_Release(ctx->factory1); + if (ctx->context) ID3D11DeviceContext_Release(ctx->context); @@ -142,6 +236,7 @@ static void d3d11_ctx_free(struct d3d11_ctx *ctx) ctx->swap_chain2 = NULL; ctx->swap_chain3 = NULL; ctx->swap_chain4 = NULL; + ctx->factory1 = NULL; ctx->context = NULL; ctx->device = NULL; } @@ -201,6 +296,12 @@ static bool d3d11_ctx_init(struct d3d11_ctx *ctx) goto except; } + e = IDXGIFactory2_QueryInterface(factory2, &IID_IDXGIFactory1, &ctx->factory1); + if (e != S_OK) { + MTY_Log("'IDXGIFactory2_QueryInterface' failed with HRESULT 0x%X", e); + goto except; + } + e = IDXGIFactory2_CreateSwapChainForHwnd(factory2, unknown, ctx->hwnd, &sd, NULL, NULL, &swap_chain1); if (e != S_OK) { MTY_Log("'IDXGIFactory2_CreateSwapChainForHwnd' failed with HRESULT 0x%X", e); @@ -249,6 +350,8 @@ static bool d3d11_ctx_init(struct d3d11_ctx *ctx) if (we != WAIT_OBJECT_0) MTY_Log("'WaitForSingleObjectEx' failed with error 0x%X", we); + ctx->hdr_supported = d3d11_ctx_query_hdr_support(ctx); + except: if (swap_chain1) @@ -501,8 +604,10 @@ bool mty_d3d11_ctx_make_current(struct gfx_ctx *gfx_ctx, bool current) bool mty_d3d11_ctx_hdr_supported(struct gfx_ctx *gfx_ctx) { struct d3d11_ctx *ctx = (struct d3d11_ctx *) gfx_ctx; + + if (!ctx->factory1 || !IDXGIFactory1_IsCurrent(ctx->factory1)) { + ctx->hdr_supported = d3d11_ctx_query_hdr_support(ctx); + } - // TODO: Query the output6 that intersects with the window. But only do this if IsCurrent is false - - return true; + return ctx->hdr_supported; } From 3717a377893c1c97979da59f0095b6630a47ad48 Mon Sep 17 00:00:00 2001 From: Daniel Vijayakumar Date: Wed, 30 Mar 2022 16:15:05 -0400 Subject: [PATCH 13/28] Got rid of unnecessary clamping to (-0.5, 7.4999) --- src/windows/gfx/shaders/d3d11/ps.ps4 | 1 - 1 file changed, 1 deletion(-) diff --git a/src/windows/gfx/shaders/d3d11/ps.ps4 b/src/windows/gfx/shaders/d3d11/ps.ps4 index d68e6a62d..cb7b98292 100644 --- a/src/windows/gfx/shaders/d3d11/ps.ps4 +++ b/src/windows/gfx/shaders/d3d11/ps.ps4 @@ -107,7 +107,6 @@ inline float4 rec2020_pq_to_scrgb_linear(float4 rgba) color = rec2020_pq_to_rec2020_linear(color, SDR_MAX_NITS); // the HDR10 frame has already been encoded using the ST2084 PQ curve, so we need to decode it in order to get back linear colors color = mul(REC2020_TO_SRGB_obs, color); - color = clamp(color, -0.5f, 7.4999f); // range of scRGB according to https://www.color.org/chardata/rgb/scrgb.xalter return float4(color, rgba.a); } From 16f1877282123a217d9104ff472331e840422296 Mon Sep 17 00:00:00 2001 From: Daniel Vijayakumar Date: Tue, 12 Apr 2022 15:31:57 -0400 Subject: [PATCH 14/28] Fixed bug in HDR support detection that was always checking the primary display window bounds. --- src/windows/gfx/d3d11-ctx.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/windows/gfx/d3d11-ctx.c b/src/windows/gfx/d3d11-ctx.c index 92fc44815..a55a9cb62 100644 --- a/src/windows/gfx/d3d11-ctx.c +++ b/src/windows/gfx/d3d11-ctx.c @@ -139,6 +139,14 @@ static bool d3d11_ctx_query_hdr_support(struct d3d11_ctx *ctx) return r; } + // Get the retangle bounds of the app window + RECT window_bounds = {0}; + GetWindowRect(ctx->hwnd, &window_bounds); + LONG ax1 = window_bounds.left; + LONG ay1 = window_bounds.top; + LONG ax2 = window_bounds.right; + LONG ay2 = window_bounds.bottom; + // Go through the outputs of each and every adapter IDXGIOutput *current_output = NULL; IDXGIOutput *best_output = NULL; @@ -148,14 +156,6 @@ static bool d3d11_ctx_query_hdr_support(struct d3d11_ctx *ctx) for (UINT i = 0; IDXGIAdapter1_EnumOutputs(adapter1, i, ¤t_output) != DXGI_ERROR_NOT_FOUND; i++) { - // Get the retangle bounds of the app window - RECT window_bounds = {0}; - GetClientRect(ctx->hwnd, &window_bounds); - LONG ax1 = window_bounds.left; - LONG ay1 = window_bounds.top; - LONG ax2 = window_bounds.right; - LONG ay2 = window_bounds.bottom; - // Get the rectangle bounds of current output DXGI_OUTPUT_DESC desc = {0}; e = IDXGIOutput_GetDesc(current_output, &desc); From 72b5898d4741a3f49529e20f0cc668739b9f5e6d Mon Sep 17 00:00:00 2001 From: Daniel Vijayakumar Date: Tue, 12 Apr 2022 16:18:04 -0400 Subject: [PATCH 15/28] HDR support detection now works when the parsec app is moved to different monitors. --- src/windows/gfx/d3d11-ctx.c | 50 +++++++++++++++++++++++++++---------- 1 file changed, 37 insertions(+), 13 deletions(-) diff --git a/src/windows/gfx/d3d11-ctx.c b/src/windows/gfx/d3d11-ctx.c index a55a9cb62..5fa8ac8a8 100644 --- a/src/windows/gfx/d3d11-ctx.c +++ b/src/windows/gfx/d3d11-ctx.c @@ -48,6 +48,7 @@ struct d3d11_ctx { bool hdr; bool composite_ui; MTY_HDRDesc hdr_desc; + RECT window_bounds; }; static void d3d11_ctx_get_size(struct d3d11_ctx *ctx, uint32_t *width, uint32_t *height) @@ -117,6 +118,25 @@ static void mty_validate_format_colorspace(struct d3d11_ctx *ctx, MTY_ColorForma *colorspace_out = colorspace_new; } +static bool d3d11_ctx_refresh_window_bounds(struct d3d11_ctx *ctx) +{ + bool changed = false; + + RECT window_bounds_new = {0}; + GetWindowRect(ctx->hwnd, &window_bounds_new); + + LONG dt_left = window_bounds_new.left - ctx->window_bounds.left; + LONG dt_top = window_bounds_new.top - ctx->window_bounds.top; + LONG dt_right = window_bounds_new.right - ctx->window_bounds.right; + LONG dt_bottom = window_bounds_new.bottom - ctx->window_bounds.bottom; + + changed = labs(dt_left) || labs(dt_top) || labs(dt_right) || labs(dt_bottom); + + ctx->window_bounds = window_bounds_new; + + return changed; +} + static bool d3d11_ctx_query_hdr_support(struct d3d11_ctx *ctx) { bool r = false; @@ -140,12 +160,10 @@ static bool d3d11_ctx_query_hdr_support(struct d3d11_ctx *ctx) } // Get the retangle bounds of the app window - RECT window_bounds = {0}; - GetWindowRect(ctx->hwnd, &window_bounds); - LONG ax1 = window_bounds.left; - LONG ay1 = window_bounds.top; - LONG ax2 = window_bounds.right; - LONG ay2 = window_bounds.bottom; + const LONG ax1 = ctx->window_bounds.left; + const LONG ay1 = ctx->window_bounds.top; + const LONG ax2 = ctx->window_bounds.right; + const LONG ay2 = ctx->window_bounds.bottom; // Go through the outputs of each and every adapter IDXGIOutput *current_output = NULL; @@ -162,14 +180,14 @@ static bool d3d11_ctx_query_hdr_support(struct d3d11_ctx *ctx) if (e != S_OK) { MTY_Log("'IDXGIOutput_GetDesc' failed with HRESULT 0x%X", e); } else { - RECT output_bounds = desc.DesktopCoordinates; - LONG bx1 = output_bounds.left; - LONG by1 = output_bounds.top; - LONG bx2 = output_bounds.right; - LONG by2 = output_bounds.bottom; + const RECT output_bounds = desc.DesktopCoordinates; + const LONG bx1 = output_bounds.left; + const LONG by1 = output_bounds.top; + const LONG bx2 = output_bounds.right; + const LONG by2 = output_bounds.bottom; // Compute the intersection and see if its the best fit - LONG intersect_area = max(0, min(ax2, bx2) - max(ax1, bx1)) * max(0, min(ay2, by2) - max(ay1, by1)); // courtesy of https://github.com/microsoft/DirectX-Graphics-Samples/blob/c79f839da1bb2db77d2306be5e4e664a5d23a36b/Samples/Desktop/D3D12HDR/src/D3D12HDR.cpp#L1046 + const LONG intersect_area = max(0, min(ax2, bx2) - max(ax1, bx1)) * max(0, min(ay2, by2) - max(ay1, by1)); // courtesy of https://github.com/microsoft/DirectX-Graphics-Samples/blob/c79f839da1bb2db77d2306be5e4e664a5d23a36b/Samples/Desktop/D3D12HDR/src/D3D12HDR.cpp#L1046 if (intersect_area > best_intersect_area) { best_output = current_output; best_intersect_area = (float) intersect_area; // not sure why but the MSDN sample stores this as float when its all integer math...it works though! @@ -350,6 +368,8 @@ static bool d3d11_ctx_init(struct d3d11_ctx *ctx) if (we != WAIT_OBJECT_0) MTY_Log("'WaitForSingleObjectEx' failed with error 0x%X", we); + d3d11_ctx_refresh_window_bounds(ctx); + ctx->hdr_supported = d3d11_ctx_query_hdr_support(ctx); except: @@ -605,7 +625,11 @@ bool mty_d3d11_ctx_hdr_supported(struct gfx_ctx *gfx_ctx) { struct d3d11_ctx *ctx = (struct d3d11_ctx *) gfx_ctx; - if (!ctx->factory1 || !IDXGIFactory1_IsCurrent(ctx->factory1)) { + if ( + d3d11_ctx_refresh_window_bounds(ctx) // check whether window was moved to another screen + || !ctx->factory1 + || !IDXGIFactory1_IsCurrent(ctx->factory1) // display adapter reset for a variety of reasons + ) { ctx->hdr_supported = d3d11_ctx_query_hdr_support(ctx); } From b84a9a5b720bd0ce7d8603aa4f74f1e2c1b48b8e Mon Sep 17 00:00:00 2001 From: Daniel Vijayakumar Date: Thu, 31 Mar 2022 11:28:15 -0400 Subject: [PATCH 16/28] We now keep the HDR window's swapchain in HDR10 format (rec2020 10-bit RGB + ST2084 PQ) instead of FP16 scRGB linear. One less conversion step certainly increases color accuracy. --- src/windows/gfx/d3d11-ctx.c | 6 ++-- src/windows/gfx/d3d11-ui.c | 2 +- src/windows/gfx/shaders/d3d11/hdr10.hlsl | 2 +- src/windows/gfx/shaders/d3d11/ps.ps4 | 36 ------------------------ src/windows/gfx/shaders/d3d11/psui.ps4 | 23 +++++++++++++-- 5 files changed, 26 insertions(+), 43 deletions(-) diff --git a/src/windows/gfx/d3d11-ctx.c b/src/windows/gfx/d3d11-ctx.c index 5fa8ac8a8..ae29b8448 100644 --- a/src/windows/gfx/d3d11-ctx.c +++ b/src/windows/gfx/d3d11-ctx.c @@ -463,9 +463,9 @@ static void d3d11_ctx_refresh(struct d3d11_ctx *ctx) bool hdr = ctx->format_new == DXGI_FORMAT_R16G16B16A16_FLOAT || ctx->colorspace_new == DXGI_COLOR_SPACE_RGB_FULL_G2084_NONE_P2020; if (ctx->hdr != hdr) { - // If in HDR mode, we keep swap chain in FP16 scRGB linear; otherwise in SDR mode, it's the standard RGBA8 sRGB - DXGI_FORMAT format = hdr ? DXGI_FORMAT_R16G16B16A16_FLOAT : DXGI_FORMAT_B8G8R8A8_UNORM; - DXGI_COLOR_SPACE_TYPE colorspace = hdr ? DXGI_COLOR_SPACE_RGB_FULL_G10_NONE_P709 : DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P709; + // If in HDR mode, we keep swap chain in HDR10 (rec2020 10-bit RGB + ST2084 PQ); otherwise in SDR mode, it's the standard RGBA8 sRGB + DXGI_FORMAT format = hdr ? DXGI_FORMAT_R10G10B10A2_UNORM : DXGI_FORMAT_B8G8R8A8_UNORM; + DXGI_COLOR_SPACE_TYPE colorspace = hdr ? DXGI_COLOR_SPACE_RGB_FULL_G2084_NONE_P2020 : DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P709; HRESULT e = IDXGISwapChain2_ResizeBuffers(ctx->swap_chain2, 0, 0, 0, format, D3D11_SWFLAGS); if (e == S_OK) { diff --git a/src/windows/gfx/d3d11-ui.c b/src/windows/gfx/d3d11-ui.c index 57b8eb647..6de127455 100644 --- a/src/windows/gfx/d3d11-ui.c +++ b/src/windows/gfx/d3d11-ui.c @@ -306,7 +306,7 @@ bool mty_d3d11_ui_render(struct gfx_ui *gfx_ui, MTY_Device *device, MTY_Context // Update pixel shader constant buffer data struct d3d11_ui_cbps cbps = {0}; cbps.hdr = (uint32_t) dd->hdr; - cbps.hdr_brighten_factor = 2.5f; // XXX: this is something that we should allow the user to configure via client settings + cbps.hdr_brighten_factor = 3.75f; // XXX: this is something that we should allow the user to configure via client settings D3D11_MAPPED_SUBRESOURCE cbps_map = {0}; e = ID3D11DeviceContext_Map(_context, ctx->cbps_res, 0, D3D11_MAP_WRITE_DISCARD, 0, &cbps_map); diff --git a/src/windows/gfx/shaders/d3d11/hdr10.hlsl b/src/windows/gfx/shaders/d3d11/hdr10.hlsl index 546b680cd..c96a4f0dd 100644 --- a/src/windows/gfx/shaders/d3d11/hdr10.hlsl +++ b/src/windows/gfx/shaders/d3d11/hdr10.hlsl @@ -38,7 +38,7 @@ float3 rec2020_linear_to_rec2020_pq(float3 color, float sdr_max_nits) // Apply the inverse of the PQ EOTF (SMPTE ST 2084-2014) in order to encode the signal as PQ // Courtesy of https://github.com/colour-science/colour/blob/38782ac059e8ddd91939f3432bf06811c16667f0/colour/models/rgb/transfer_functions/st_2084.py#L56 - float3 Y_p = spow3(saturate(color / HDR10_MAX_NITS) * sdr_max_nits, PQ_m_1); + float3 Y_p = spow3(max(0.0f, (color / HDR10_MAX_NITS) * sdr_max_nits), PQ_m_1); float3 N = spow3((PQ_c_1 + PQ_c_2 * Y_p) / (PQ_c_3 * Y_p + 1.0f), PQ_m_2); diff --git a/src/windows/gfx/shaders/d3d11/ps.ps4 b/src/windows/gfx/shaders/d3d11/ps.ps4 index cb7b98292..4d9e7a3af 100644 --- a/src/windows/gfx/shaders/d3d11/ps.ps4 +++ b/src/windows/gfx/shaders/d3d11/ps.ps4 @@ -78,39 +78,6 @@ inline float4 yuv_to_rgba_rec2020(float y, float u, float v) static const float SDR_MAX_NITS = 80.0f; -// This is the precomputed matrix that transforms linear rec2020 to ACES to linear sRGB/rec709. -// Computed as B * A, where -// A = [rec2020 => XYZ => D65_2_D60 => AP1 => RRT_SAT] -// 0.9411843241 0.04576699764 0.00553454759 -// 0.00737755958 0.98266607517 0.00244228163 -// 0.00989047793 0.05050263667 0.93209270519 -// and B = [ODT_SAT => XYZ => D60_2_D65 => sRGB] -// 1.60475 -0.53108 -0.07367 -// -0.10208 1.10813 -0.00605 -// -0.00327 -0.07276 1.07602 -static const float3x3 REC2020_TO_SRGB = -{ - {1.505718838, -0.452150239, -0.061082751}, - {-0.087960638, 1.083944322, -0.003497762}, - {0.007027888, -0.017306595, 1.002754594} -}; -static const float3x3 REC2020_TO_SRGB_obs = -{ - {1.6604910, -0.5876411, -0.0728499}, - {-0.087960638, 1.083944322, -0.0083494}, - {-0.0181508, -0.1005789, 1.1187297 } -}; - -inline float4 rec2020_pq_to_scrgb_linear(float4 rgba) -{ - float3 color = rgba.rgb; - - color = rec2020_pq_to_rec2020_linear(color, SDR_MAX_NITS); // the HDR10 frame has already been encoded using the ST2084 PQ curve, so we need to decode it in order to get back linear colors - color = mul(REC2020_TO_SRGB_obs, color); - - return float4(color, rgba.a); -} - inline void gaussian(uint type, float w, float h, inout float2 uv) { float2 res = float2(w, h); @@ -172,8 +139,6 @@ float4 main(VS_OUTPUT input) : SV_TARGET if (format == 9 && colorspace == 3) { rgba = yuv_to_rgba_rec2020(y, u, v); - rgba = rec2020_pq_to_scrgb_linear(rgba); - } else { rgba = yuv_to_rgba(y, u, v); } @@ -186,7 +151,6 @@ float4 main(VS_OUTPUT input) : SV_TARGET if (format == 10 && colorspace == 3) { rgba = yuv_to_rgba_rec2020(y, u, v); - rgba = rec2020_pq_to_scrgb_linear(rgba); } else { rgba = yuv_to_rgba(y, u, v); } diff --git a/src/windows/gfx/shaders/d3d11/psui.ps4 b/src/windows/gfx/shaders/d3d11/psui.ps4 index e84a08db1..6ab41a6e4 100644 --- a/src/windows/gfx/shaders/d3d11/psui.ps4 +++ b/src/windows/gfx/shaders/d3d11/psui.ps4 @@ -4,6 +4,8 @@ // If a copy of the MIT License was not distributed with this file, // You can obtain one at https://spdx.org/licenses/MIT.html. +#include "hdr10.hlsl" + struct PS_INPUT { float4 pos : SV_POSITION; float4 col : COLOR0; @@ -18,20 +20,37 @@ cbuffer VS_CONSTANT_BUFFER : register(b0) { sampler sampler0; Texture2D texture0; +static const float SDR_MAX_NITS = 80.0f; // the reference sRGB luminance is 80 nits (aka the brightness of paper white) + +// Courtesy of https://github.com/obsproject/obs-studio/pull/6157/files#diff-81ee756f47c3a2fbb9f9fa0a858d79c4da89db97d8ae79fbd643c9533fba177b +static const float3x3 REC709_TO_REC2020 = +{ + {0.6274040f, 0.3292820f, 0.0433136f}, + {0.0690970f, 0.9195400f, 0.0113612f}, + {0.0163916f, 0.0880132f, 0.8955950f} +}; + float3 srgb_to_linear(float3 color) { // Fast approximation of sRGB's transfer function return pow(abs(saturate(color)), 2.2f); } +float3 srgb_linear_to_rec2020_linear(float3 color) +{ + return mul(REC709_TO_REC2020, color); +} + float4 main(PS_INPUT input) : SV_Target { float4 ui = input.col * texture0.Sample(sampler0, input.uv); if (hdr) { float3 ui_rgb = ui.rgb; - ui_rgb = srgb_to_linear(ui_rgb); // UI texture is encoded non-linearly in sRGB whereas the render target is in scRGB linear, so we need to linearize the UI - ui_rgb *= hdr_brighten_factor; // 1.0 in scRGB is 80 nits which is the reference SDR luminance but most SDR displays will actually render 1.0 at around 200-300 nits for improved viewing; we mimic this by brightening the UI texture by a configurable constant + ui_rgb = srgb_to_linear(ui_rgb); // UI texture is encoded non-linearly in sRGB, so we need to first linearize it + ui_rgb = srgb_linear_to_rec2020_linear(ui_rgb); + ui_rgb *= hdr_brighten_factor; // 1.0 in sRGB is 80 nits which is the reference SDR luminance but most SDR displays will actually render 1.0 at around 200-300 nits for improved viewing; we mimic this by brightening the UI texture by a configurable constant + ui_rgb = rec2020_linear_to_rec2020_pq(ui_rgb, SDR_MAX_NITS); ui.rgb = ui_rgb; } From ce23da0b834dee82fa2061a9b378944dc11a8c5a Mon Sep 17 00:00:00 2001 From: Daniel Vijayakumar Date: Mon, 18 Apr 2022 12:35:53 -0400 Subject: [PATCH 17/28] Addressed some code review comments. - Got rid of the ternary sausage in d3d11.c reload_textures. The code for texture format determination is now more readable. - Failing to init HDR does NOT cause the d3d11 ctx to fail anymore. HDR is an optional feature. --- src/windows/gfx/d3d11-ctx.c | 71 ++++++++++++++++++++++--------------- src/windows/gfx/d3d11.c | 19 +++++++--- 2 files changed, 57 insertions(+), 33 deletions(-) diff --git a/src/windows/gfx/d3d11-ctx.c b/src/windows/gfx/d3d11-ctx.c index ae29b8448..47851cf29 100644 --- a/src/windows/gfx/d3d11-ctx.c +++ b/src/windows/gfx/d3d11-ctx.c @@ -9,9 +9,6 @@ GFX_CTX_PROTOTYPES(_d3d11_) #define COBJMACROS #include -#include -#include -#include #include #define DXGI_FATAL(e) ( \ @@ -44,6 +41,7 @@ struct d3d11_ctx { IDXGISwapChain4 *swap_chain4; IDXGIFactory1 *factory1; HANDLE waitable; + bool hdr_init; bool hdr_supported; bool hdr; bool composite_ui; @@ -223,8 +221,22 @@ static bool d3d11_ctx_query_hdr_support(struct d3d11_ctx *ctx) return r; } +static void d3d11_ctx_free_hdr(struct d3d11_ctx *ctx) +{ + if (ctx->swap_chain4) + IDXGISwapChain4_Release(ctx->swap_chain4); + + if (ctx->swap_chain3) + IDXGISwapChain3_Release(ctx->swap_chain3); + + ctx->swap_chain4 = NULL; + ctx->swap_chain3 = NULL; +} + static void d3d11_ctx_free(struct d3d11_ctx *ctx) { + d3d11_ctx_free_hdr(ctx); + if (ctx->back_buffer) ID3D11Texture2D_Release(ctx->back_buffer); @@ -234,12 +246,6 @@ static void d3d11_ctx_free(struct d3d11_ctx *ctx) if (ctx->swap_chain2) IDXGISwapChain2_Release(ctx->swap_chain2); - if (ctx->swap_chain3) - IDXGISwapChain3_Release(ctx->swap_chain3); - - if (ctx->swap_chain4) - IDXGISwapChain4_Release(ctx->swap_chain4); - if (ctx->factory1) IDXGIFactory1_Release(ctx->factory1); @@ -252,8 +258,6 @@ static void d3d11_ctx_free(struct d3d11_ctx *ctx) ctx->back_buffer = NULL; ctx->waitable = NULL; ctx->swap_chain2 = NULL; - ctx->swap_chain3 = NULL; - ctx->swap_chain4 = NULL; ctx->factory1 = NULL; ctx->context = NULL; ctx->device = NULL; @@ -332,18 +336,6 @@ static bool d3d11_ctx_init(struct d3d11_ctx *ctx) goto except; } - e = IDXGISwapChain1_QueryInterface(swap_chain1, &IID_IDXGISwapChain3, &ctx->swap_chain3); - if (e != S_OK) { - MTY_Log("'IDXGISwapChain1_QueryInterface' failed with HRESULT 0x%X", e); - goto except; - } - - e = IDXGISwapChain1_QueryInterface(swap_chain1, &IID_IDXGISwapChain4, &ctx->swap_chain4); - if (e != S_OK) { - MTY_Log("'IDXGISwapChain1_QueryInterface' failed with HRESULT 0x%X", e); - goto except; - } - ctx->waitable = IDXGISwapChain2_GetFrameLatencyWaitableObject(ctx->swap_chain2); if (!ctx->waitable) { e = !S_OK; @@ -368,10 +360,29 @@ static bool d3d11_ctx_init(struct d3d11_ctx *ctx) if (we != WAIT_OBJECT_0) MTY_Log("'WaitForSingleObjectEx' failed with error 0x%X", we); - d3d11_ctx_refresh_window_bounds(ctx); + // HDR init + HRESULT e_hdr = IDXGISwapChain1_QueryInterface(swap_chain1, &IID_IDXGISwapChain3, &ctx->swap_chain3); + if (e_hdr != S_OK) { + MTY_Log("'IDXGISwapChain1_QueryInterface' failed with HRESULT 0x%X", e_hdr); + goto except_hdr; + } + + e_hdr = IDXGISwapChain1_QueryInterface(swap_chain1, &IID_IDXGISwapChain4, &ctx->swap_chain4); + if (e_hdr != S_OK) { + MTY_Log("'IDXGISwapChain1_QueryInterface' failed with HRESULT 0x%X", e_hdr); + goto except_hdr; + } + + ctx->hdr_init = true; + d3d11_ctx_refresh_window_bounds(ctx); ctx->hdr_supported = d3d11_ctx_query_hdr_support(ctx); + except_hdr: + + if (e_hdr != S_OK) + d3d11_ctx_free_hdr(ctx); + except: if (swap_chain1) @@ -625,11 +636,13 @@ bool mty_d3d11_ctx_hdr_supported(struct gfx_ctx *gfx_ctx) { struct d3d11_ctx *ctx = (struct d3d11_ctx *) gfx_ctx; - if ( - d3d11_ctx_refresh_window_bounds(ctx) // check whether window was moved to another screen - || !ctx->factory1 - || !IDXGIFactory1_IsCurrent(ctx->factory1) // display adapter reset for a variety of reasons - ) { + if (!ctx->hdr_init) { + ctx->hdr_supported = false; + } else if ( + d3d11_ctx_refresh_window_bounds(ctx) // check whether window was moved to another screen + || !ctx->factory1 + || !IDXGIFactory1_IsCurrent(ctx->factory1) // display adapter reset for a variety of reasons + ) { ctx->hdr_supported = d3d11_ctx_query_hdr_support(ctx); } diff --git a/src/windows/gfx/d3d11.c b/src/windows/gfx/d3d11.c index d6fe38b1e..1f3f32c2c 100644 --- a/src/windows/gfx/d3d11.c +++ b/src/windows/gfx/d3d11.c @@ -282,10 +282,21 @@ static HRESULT d3d11_reload_textures(struct d3d11 *ctx, ID3D11Device *device, ID case MTY_COLOR_FORMAT_BGRA5551: case MTY_COLOR_FORMAT_RGB10A2: case MTY_COLOR_FORMAT_RGBA16F: { - DXGI_FORMAT format = desc->format == MTY_COLOR_FORMAT_BGR565 ? DXGI_FORMAT_B5G6R5_UNORM : - desc->format == MTY_COLOR_FORMAT_BGRA5551 ? DXGI_FORMAT_B5G5R5A1_UNORM : - desc->format == MTY_COLOR_FORMAT_RGB10A2 ? DXGI_FORMAT_R10G10B10A2_UNORM : - desc->format == MTY_COLOR_FORMAT_RGBA16F ? DXGI_FORMAT_R16G16B16A16_FLOAT : DXGI_FORMAT_B8G8R8A8_UNORM; + DXGI_FORMAT format = DXGI_FORMAT_B8G8R8A8_UNORM; + switch (desc->format) { + case MTY_COLOR_FORMAT_BGR565: + format = DXGI_FORMAT_B5G6R5_UNORM; + break; + case MTY_COLOR_FORMAT_BGRA5551: + format = DXGI_FORMAT_B5G5R5A1_UNORM; + break; + case MTY_COLOR_FORMAT_RGB10A2: + format = DXGI_FORMAT_R10G10B10A2_UNORM; + break; + case MTY_COLOR_FORMAT_RGBA16F: + format = DXGI_FORMAT_R16G16B16A16_FLOAT; + break; + } uint8_t bpp = (desc->format == MTY_COLOR_FORMAT_BGRA || desc->format == MTY_COLOR_FORMAT_RGB10A2 || desc->format == MTY_COLOR_FORMAT_AYUV) ? 4 : 2; if (format == DXGI_FORMAT_R16G16B16A16_FLOAT) bpp = 8; From 86de9d6f42e95c165a028181cc1da9d123655f98 Mon Sep 17 00:00:00 2001 From: Daniel Vijayakumar Date: Sat, 23 Apr 2022 07:37:49 -0400 Subject: [PATCH 18/28] Added support for textures of format Y410. This is a 10-bit packed 4:4:4 video texture format (10 bits each for YUV, 2 bits for alpha, similar to the RGB10A2 structure). --- src/matoya.h | 9 +++++---- src/windows/gfx/d3d11.c | 4 +++- src/windows/gfx/shaders/d3d11/ps.ps4 | 23 ++++++++++++++--------- 3 files changed, 22 insertions(+), 14 deletions(-) diff --git a/src/matoya.h b/src/matoya.h index 0669d6094..5ed921284 100644 --- a/src/matoya.h +++ b/src/matoya.h @@ -70,11 +70,12 @@ typedef enum { MTY_COLOR_FORMAT_BGR565 = 6, ///< 5-bits blue, 6-bits green, 5-bits red. MTY_COLOR_FORMAT_BGRA5551 = 7, ///< 5-bits per BGR channels, 1-bit alpha. MTY_COLOR_FORMAT_AYUV = 8, ///< 4:4:4 full W/H interleaved Y, U, V. - MTY_COLOR_FORMAT_P016 = 9, ///< 4:2:0 full W/H Y plane followed by a half W/H U plane + MTY_COLOR_FORMAT_Y410 = 9, ///< 4:4:4 full W/H interleaved Y, U, V. 10-bit YUV, 2-bit alpha. + MTY_COLOR_FORMAT_P016 = 10, ///< 4:2:0 full W/H Y plane followed by a half W/H U plane ///< followed by a half W/H V plane. Supports 10-bit to 16-bit data. - MTY_COLOR_FORMAT_I444_16 = 10, ///< 4:4:4 full W/H consecutive Y, U, V planes. Supports 10-bit to 16-bit data. - MTY_COLOR_FORMAT_RGB10A2 = 11, ///< 10-bits per RGB channels, 2-bit alpha. - MTY_COLOR_FORMAT_RGBA16F = 12, ///< 16-bits floating-point precision per channel RGBA. + MTY_COLOR_FORMAT_I444_16 = 11, ///< 4:4:4 full W/H consecutive Y, U, V planes. Supports 10-bit to 16-bit data. + MTY_COLOR_FORMAT_RGB10A2 = 12, ///< 10-bits per RGB channels, 2-bit alpha. + MTY_COLOR_FORMAT_RGBA16F = 13, ///< 16-bits floating-point precision per channel RGBA. MTY_COLOR_FORMAT_MAKE_32 = INT32_MAX, } MTY_ColorFormat; diff --git a/src/windows/gfx/d3d11.c b/src/windows/gfx/d3d11.c index 1f3f32c2c..88fe261ea 100644 --- a/src/windows/gfx/d3d11.c +++ b/src/windows/gfx/d3d11.c @@ -278,6 +278,7 @@ static HRESULT d3d11_reload_textures(struct d3d11 *ctx, ID3D11Device *device, ID switch (desc->format) { case MTY_COLOR_FORMAT_BGRA: case MTY_COLOR_FORMAT_AYUV: + case MTY_COLOR_FORMAT_Y410: case MTY_COLOR_FORMAT_BGR565: case MTY_COLOR_FORMAT_BGRA5551: case MTY_COLOR_FORMAT_RGB10A2: @@ -290,6 +291,7 @@ static HRESULT d3d11_reload_textures(struct d3d11 *ctx, ID3D11Device *device, ID case MTY_COLOR_FORMAT_BGRA5551: format = DXGI_FORMAT_B5G5R5A1_UNORM; break; + case MTY_COLOR_FORMAT_Y410: // according to MSDN, the view format of Y410 is RGB10A2, just like how that of AYUV is BGRA8 case MTY_COLOR_FORMAT_RGB10A2: format = DXGI_FORMAT_R10G10B10A2_UNORM; break; @@ -297,7 +299,7 @@ static HRESULT d3d11_reload_textures(struct d3d11 *ctx, ID3D11Device *device, ID format = DXGI_FORMAT_R16G16B16A16_FLOAT; break; } - uint8_t bpp = (desc->format == MTY_COLOR_FORMAT_BGRA || desc->format == MTY_COLOR_FORMAT_RGB10A2 || desc->format == MTY_COLOR_FORMAT_AYUV) ? 4 : 2; + uint8_t bpp = (desc->format == MTY_COLOR_FORMAT_BGRA || desc->format == MTY_COLOR_FORMAT_RGB10A2 || desc->format == MTY_COLOR_FORMAT_AYUV || desc->format == MTY_COLOR_FORMAT_Y410) ? 4 : 2; if (format == DXGI_FORMAT_R16G16B16A16_FLOAT) bpp = 8; diff --git a/src/windows/gfx/shaders/d3d11/ps.ps4 b/src/windows/gfx/shaders/d3d11/ps.ps4 index 4d9e7a3af..1aa667ff9 100644 --- a/src/windows/gfx/shaders/d3d11/ps.ps4 +++ b/src/windows/gfx/shaders/d3d11/ps.ps4 @@ -132,40 +132,45 @@ float4 main(VS_OUTPUT input) : SV_TARGET gaussian(filter, width, height, uv); // NV12, NV16, P016 - if (format == 2 || format == 5 || format == 9) { + if (format == 2 || format == 5 || format == 10) { float y = tex0.Sample(ss, uv).r; float u = tex1.Sample(ss, uv).r; float v = tex1.Sample(ss, uv).g; - if (format == 9 && colorspace == 3) { + if (format == 10 && colorspace == 3) { rgba = yuv_to_rgba_rec2020(y, u, v); } else { rgba = yuv_to_rgba(y, u, v); } // I420, I444, I444_16 - } else if (format == 3 || format == 4 || format == 10) { + } else if (format == 3 || format == 4 || format == 11) { float y = tex0.Sample(ss, uv).r; float u = tex1.Sample(ss, uv).r; float v = tex2.Sample(ss, uv).r; - if (format == 10 && colorspace == 3) { + if (format == 11 && colorspace == 3) { rgba = yuv_to_rgba_rec2020(y, u, v); } else { rgba = yuv_to_rgba(y, u, v); } - // AYUV - } else if (format == 8) { + // AYUV, Y410 + } else if (format == 8 || format == 9) { float y = tex0.Sample(ss, uv).r; float u = tex0.Sample(ss, uv).g; float v = tex0.Sample(ss, uv).b; - rgba = yuv_to_rgba(y, u, v); + if (format == 9 && colorspace == 3) { + // Note the reordering of yuv components here...RGBA maps to UYVA according to https://docs.microsoft.com/en-us/windows/win32/api/dxgiformat/ne-dxgiformat-dxgi_format + rgba = yuv_to_rgba_rec2020(u, y, v); + } else { + rgba = yuv_to_rgba(y, u, v); + } // TODO: This section is a bit obsolete I think....re-evaluate if its needed // RGB10A2, either sRGB or HDR10 - } else if (format == 11 && (colorspace == 1 || colorspace == 3)) { + } else if (format == 12 && (colorspace == 1 || colorspace == 3)) { if (colorspace == 1) { // sRGB - do nothing different rgba = tex0.Sample(ss, uv); @@ -177,7 +182,7 @@ float4 main(VS_OUTPUT input) : SV_TARGET } // RGBA16F, scRGB linear - } else if (format == 12 && colorspace == 2) { + } else if (format == 13 && colorspace == 2) { rgba = tex0.Sample(ss, uv); // BGRA From 2eec3eccf8f0dc2e4e6fd8956345bed6f95bf606 Mon Sep 17 00:00:00 2001 From: Daniel Vijayakumar Date: Tue, 26 Apr 2022 17:47:40 -0400 Subject: [PATCH 19/28] Added support for textures of format I444 10-bit. This is a planar 4:4:4 video texture format supporting 10-bit data, albeit in a 16-bit per planar pixel layout. --- src/matoya.h | 7 ++++--- src/windows/gfx/d3d11.c | 5 +++-- src/windows/gfx/shaders/d3d11/ps.ps4 | 19 ++++++++++++++----- 3 files changed, 21 insertions(+), 10 deletions(-) diff --git a/src/matoya.h b/src/matoya.h index 5ed921284..0682df0d9 100644 --- a/src/matoya.h +++ b/src/matoya.h @@ -73,9 +73,10 @@ typedef enum { MTY_COLOR_FORMAT_Y410 = 9, ///< 4:4:4 full W/H interleaved Y, U, V. 10-bit YUV, 2-bit alpha. MTY_COLOR_FORMAT_P016 = 10, ///< 4:2:0 full W/H Y plane followed by a half W/H U plane ///< followed by a half W/H V plane. Supports 10-bit to 16-bit data. - MTY_COLOR_FORMAT_I444_16 = 11, ///< 4:4:4 full W/H consecutive Y, U, V planes. Supports 10-bit to 16-bit data. - MTY_COLOR_FORMAT_RGB10A2 = 12, ///< 10-bits per RGB channels, 2-bit alpha. - MTY_COLOR_FORMAT_RGBA16F = 13, ///< 16-bits floating-point precision per channel RGBA. + MTY_COLOR_FORMAT_I444_10 = 11, ///< 4:4:4 full W/H consecutive Y, U, V planes. Supports 10-bit data. + MTY_COLOR_FORMAT_I444_16 = 12, ///< 4:4:4 full W/H consecutive Y, U, V planes. Supports 16-bit data. + MTY_COLOR_FORMAT_RGB10A2 = 13, ///< 10-bits per RGB channels, 2-bit alpha. + MTY_COLOR_FORMAT_RGBA16F = 14, ///< 16-bits floating-point precision per channel RGBA. MTY_COLOR_FORMAT_MAKE_32 = INT32_MAX, } MTY_ColorFormat; diff --git a/src/windows/gfx/d3d11.c b/src/windows/gfx/d3d11.c index 88fe261ea..9a4cf9571 100644 --- a/src/windows/gfx/d3d11.c +++ b/src/windows/gfx/d3d11.c @@ -338,10 +338,11 @@ static HRESULT d3d11_reload_textures(struct d3d11 *ctx, ID3D11Device *device, ID } case MTY_COLOR_FORMAT_I420: case MTY_COLOR_FORMAT_I444: + case MTY_COLOR_FORMAT_I444_10: case MTY_COLOR_FORMAT_I444_16: { const uint32_t div = desc->format == MTY_COLOR_FORMAT_I420 ? 2 : 1; - const DXGI_FORMAT format = desc->format == MTY_COLOR_FORMAT_I444_16 ? DXGI_FORMAT_R16_UNORM : DXGI_FORMAT_R8_UNORM; - const uint8_t bpp = desc->format == MTY_COLOR_FORMAT_I444_16 ? 2 : 1; + const DXGI_FORMAT format = desc->format == MTY_COLOR_FORMAT_I444_10 || desc->format == MTY_COLOR_FORMAT_I444_16 ? DXGI_FORMAT_R16_UNORM : DXGI_FORMAT_R8_UNORM; + const uint8_t bpp = format == DXGI_FORMAT_R16_UNORM ? 2 : 1; // Y HRESULT e = d3d11_refresh_resource(&ctx->staging[0], device, format, desc->cropWidth, desc->cropHeight); diff --git a/src/windows/gfx/shaders/d3d11/ps.ps4 b/src/windows/gfx/shaders/d3d11/ps.ps4 index 1aa667ff9..612c0fa92 100644 --- a/src/windows/gfx/shaders/d3d11/ps.ps4 +++ b/src/windows/gfx/shaders/d3d11/ps.ps4 @@ -143,13 +143,22 @@ float4 main(VS_OUTPUT input) : SV_TARGET rgba = yuv_to_rgba(y, u, v); } - // I420, I444, I444_16 - } else if (format == 3 || format == 4 || format == 11) { + // I420, I444, I444_10, I444_16 + } else if (format == 3 || format == 4 || format == 11 || format == 12) { float y = tex0.Sample(ss, uv).r; float u = tex1.Sample(ss, uv).r; float v = tex2.Sample(ss, uv).r; - if (format == 11 && colorspace == 3) { + // I444_10 data contain 10-bits per planar pixel but stored in 16-bit UNORM textures. + // DXGI will normalize these 10-bit values by dividing by 2^16 but this is wrong - we need them to be divided by 2^10 instead. + // We correct for this by renormalizing the values here. Note that 2^16 / 2^10 = 64. + if (format == 11) { + y = y * 64.0f; + u = u * 64.0f; + v = v * 64.0f; + } + + if ((format == 11 || format == 12) && colorspace == 3) { rgba = yuv_to_rgba_rec2020(y, u, v); } else { rgba = yuv_to_rgba(y, u, v); @@ -170,7 +179,7 @@ float4 main(VS_OUTPUT input) : SV_TARGET // TODO: This section is a bit obsolete I think....re-evaluate if its needed // RGB10A2, either sRGB or HDR10 - } else if (format == 12 && (colorspace == 1 || colorspace == 3)) { + } else if (format == 13 && (colorspace == 1 || colorspace == 3)) { if (colorspace == 1) { // sRGB - do nothing different rgba = tex0.Sample(ss, uv); @@ -182,7 +191,7 @@ float4 main(VS_OUTPUT input) : SV_TARGET } // RGBA16F, scRGB linear - } else if (format == 13 && colorspace == 2) { + } else if (format == 14 && colorspace == 2) { rgba = tex0.Sample(ss, uv); // BGRA From 64203d91b717a228cde84ffeb4a2a6564b1b6c5d Mon Sep 17 00:00:00 2001 From: Daniel Vijayakumar Date: Mon, 2 May 2022 16:32:11 -0400 Subject: [PATCH 20/28] Fixed linker errors for platforms using OpenGL. --- src/unix/apple/macosx/gfx/gl-ctx.m | 5 +++++ src/unix/linux/android/gfx/gl-ctx.c | 5 +++++ src/unix/linux/generic/gfx/gl-ctx.c | 5 +++++ src/unix/web/gfx/gl-ctx.c | 5 +++++ src/windows/gfx/gl-ctx.c | 2 +- 5 files changed, 21 insertions(+), 1 deletion(-) diff --git a/src/unix/apple/macosx/gfx/gl-ctx.m b/src/unix/apple/macosx/gfx/gl-ctx.m index 9551a6b21..40fd9aa0f 100644 --- a/src/unix/apple/macosx/gfx/gl-ctx.m +++ b/src/unix/apple/macosx/gfx/gl-ctx.m @@ -190,3 +190,8 @@ bool mty_gl_ctx_make_current(struct gfx_ctx *gfx_ctx, bool current) return true; } + +bool mty_gl_ctx_hdr_supported(struct gfx_ctx *gfx_ctx) +{ + return false; +} diff --git a/src/unix/linux/android/gfx/gl-ctx.c b/src/unix/linux/android/gfx/gl-ctx.c index 2dd4a5e2d..ea9454125 100644 --- a/src/unix/linux/android/gfx/gl-ctx.c +++ b/src/unix/linux/android/gfx/gl-ctx.c @@ -347,3 +347,8 @@ bool mty_gl_ctx_make_current(struct gfx_ctx *gfx_ctx, bool current) return r; } + +bool mty_gl_ctx_hdr_supported(struct gfx_ctx *gfx_ctx) +{ + return false; +} diff --git a/src/unix/linux/generic/gfx/gl-ctx.c b/src/unix/linux/generic/gfx/gl-ctx.c index 267cc2093..66849a181 100644 --- a/src/unix/linux/generic/gfx/gl-ctx.c +++ b/src/unix/linux/generic/gfx/gl-ctx.c @@ -163,3 +163,8 @@ bool mty_gl_ctx_make_current(struct gfx_ctx *gfx_ctx, bool current) return r; } + +bool mty_gl_ctx_hdr_supported(struct gfx_ctx *gfx_ctx) +{ + return false; +} diff --git a/src/unix/web/gfx/gl-ctx.c b/src/unix/web/gfx/gl-ctx.c index 0f5c8327e..0897c012c 100644 --- a/src/unix/web/gfx/gl-ctx.c +++ b/src/unix/web/gfx/gl-ctx.c @@ -101,3 +101,8 @@ bool mty_gl_ctx_make_current(struct gfx_ctx *gfx_ctx, bool current) { return false; } + +bool mty_gl_ctx_hdr_supported(struct gfx_ctx *gfx_ctx) +{ + return false; +} diff --git a/src/windows/gfx/gl-ctx.c b/src/windows/gfx/gl-ctx.c index f0cc9350b..6886d7176 100644 --- a/src/windows/gfx/gl-ctx.c +++ b/src/windows/gfx/gl-ctx.c @@ -203,4 +203,4 @@ bool mty_gl_ctx_make_current(struct gfx_ctx *gfx_ctx, bool current) bool mty_gl_ctx_hdr_supported(struct gfx_ctx *gfx_ctx) { return false; -} \ No newline at end of file +} From 4f4af72f71da060a39afda8434266fc9b643b2a0 Mon Sep 17 00:00:00 2001 From: Daniel Vijayakumar Date: Mon, 2 May 2022 18:45:27 -0400 Subject: [PATCH 21/28] Fixed linker errors for macOS/Metal. --- src/unix/apple/macosx/gfx/metal-ctx.m | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/unix/apple/macosx/gfx/metal-ctx.m b/src/unix/apple/macosx/gfx/metal-ctx.m index ccdff6e26..30df5a408 100644 --- a/src/unix/apple/macosx/gfx/metal-ctx.m +++ b/src/unix/apple/macosx/gfx/metal-ctx.m @@ -183,3 +183,8 @@ bool mty_metal_ctx_make_current(struct gfx_ctx *gfx_ctx, bool current) { return false; } + +bool mty_metal_ctx_hdr_supported(struct gfx_ctx *gfx_ctx) +{ + return false; +} \ No newline at end of file From d6922318fd114aef1979b36c195ebfcdc21ad549 Mon Sep 17 00:00:00 2001 From: Daniel Vijayakumar Date: Mon, 9 May 2022 15:08:13 -0400 Subject: [PATCH 22/28] Apply suggestions from code review Co-authored-by: Ronald Huveneers --- src/matoya.h | 2 +- src/windows/gfx/shaders/d3d11/hdr10.hlsl | 6 +++--- src/windows/gfx/shaders/d3d11/ps.ps4 | 2 ++ 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/matoya.h b/src/matoya.h index 0682df0d9..5ffff7d61 100644 --- a/src/matoya.h +++ b/src/matoya.h @@ -86,7 +86,7 @@ typedef enum { MTY_COLOR_SPACE_SRGB = 1, ///< sRGB/rec709 primaries and a non-linear transfer function (approx gamma curve of 2.2). Supported by all color formats. MTY_COLOR_SPACE_SCRGB_LINEAR = 2, ///< Microsoft's scRGB wide gamut color space which is based on sRGB/rec709 primaries and has a linear transfer function. Only supported by color format MTY_COLOR_FORMAT_RGBA16F. MTY_COLOR_SPACE_HDR10 = 3, ///< Uses the rec2020 color primaries and the rec2100 non-linear transfer function (ST 2084 perceptual quantizer, aka PQ). Only supported by color format MTY_COLOR_FORMAT_RGB10A2. - MTY_COLOR_SPACE_MAKE_32 = INT32_MAX, + MTY_COLOR_SPACE_MAKE_32 = INT32_MAX, } MTY_ColorSpace; /// @brief Quad texture filtering. diff --git a/src/windows/gfx/shaders/d3d11/hdr10.hlsl b/src/windows/gfx/shaders/d3d11/hdr10.hlsl index c96a4f0dd..6ba16dffb 100644 --- a/src/windows/gfx/shaders/d3d11/hdr10.hlsl +++ b/src/windows/gfx/shaders/d3d11/hdr10.hlsl @@ -38,9 +38,9 @@ float3 rec2020_linear_to_rec2020_pq(float3 color, float sdr_max_nits) // Apply the inverse of the PQ EOTF (SMPTE ST 2084-2014) in order to encode the signal as PQ // Courtesy of https://github.com/colour-science/colour/blob/38782ac059e8ddd91939f3432bf06811c16667f0/colour/models/rgb/transfer_functions/st_2084.py#L56 - float3 Y_p = spow3(max(0.0f, (color / HDR10_MAX_NITS) * sdr_max_nits), PQ_m_1); + float3 Y_p = spow3(max(0.0f, (color / HDR10_MAX_NITS) * sdr_max_nits), PQ_m_1); - float3 N = spow3((PQ_c_1 + PQ_c_2 * Y_p) / (PQ_c_3 * Y_p + 1.0f), PQ_m_2); + float3 N = spow3((PQ_c_1 + PQ_c_2 * Y_p) / (PQ_c_3 * Y_p + 1.0f), PQ_m_2); - return N; + return N; } diff --git a/src/windows/gfx/shaders/d3d11/ps.ps4 b/src/windows/gfx/shaders/d3d11/ps.ps4 index 612c0fa92..be15d86c8 100644 --- a/src/windows/gfx/shaders/d3d11/ps.ps4 +++ b/src/windows/gfx/shaders/d3d11/ps.ps4 @@ -139,6 +139,7 @@ float4 main(VS_OUTPUT input) : SV_TARGET if (format == 10 && colorspace == 3) { rgba = yuv_to_rgba_rec2020(y, u, v); + } else { rgba = yuv_to_rgba(y, u, v); } @@ -160,6 +161,7 @@ float4 main(VS_OUTPUT input) : SV_TARGET if ((format == 11 || format == 12) && colorspace == 3) { rgba = yuv_to_rgba_rec2020(y, u, v); + } else { rgba = yuv_to_rgba(y, u, v); } From ad3b0ef7c98dd48df4d4af673d82ff99e438edca Mon Sep 17 00:00:00 2001 From: Daniel Vijayakumar Date: Mon, 9 May 2022 15:09:29 -0400 Subject: [PATCH 23/28] Addressed some code review comments. --- src/matoya.h | 1 + src/unix/apple/macosx/gfx/metal-ctx.m | 2 +- src/windows/gfx/d3d11.c | 55 ++++++++++++++++++--------- src/windows/gfx/d3d12-ctx.c | 2 +- src/windows/gfx/shaders/d3d11/ps.ps4 | 8 ++-- 5 files changed, 44 insertions(+), 24 deletions(-) diff --git a/src/matoya.h b/src/matoya.h index 5ffff7d61..bab06c430 100644 --- a/src/matoya.h +++ b/src/matoya.h @@ -1292,6 +1292,7 @@ MTY_WindowGetContextState(MTY_App *app, MTY_Window window); /// @brief Tells you whether the window can show HDR content or not. /// @param app The MTY_App. /// @param window An MTY_Window. +/// @returns Returns true if the window can render HDR content, false otherwise. MTY_EXPORT bool MTY_WindowIsHDRSupported(MTY_App *app, MTY_Window window); diff --git a/src/unix/apple/macosx/gfx/metal-ctx.m b/src/unix/apple/macosx/gfx/metal-ctx.m index 30df5a408..7478edc83 100644 --- a/src/unix/apple/macosx/gfx/metal-ctx.m +++ b/src/unix/apple/macosx/gfx/metal-ctx.m @@ -187,4 +187,4 @@ bool mty_metal_ctx_make_current(struct gfx_ctx *gfx_ctx, bool current) bool mty_metal_ctx_hdr_supported(struct gfx_ctx *gfx_ctx) { return false; -} \ No newline at end of file +} diff --git a/src/windows/gfx/d3d11.c b/src/windows/gfx/d3d11.c index 9a4cf9571..d844ef020 100644 --- a/src/windows/gfx/d3d11.c +++ b/src/windows/gfx/d3d11.c @@ -272,6 +272,39 @@ static HRESULT d3d11_crop_copy(ID3D11DeviceContext *context, ID3D11Resource *tex return e; } +static void d3d11_get_dxgi_format_and_bpp(const MTY_ColorFormat format, DXGI_FORMAT *out_format, uint8_t *out_bpp) +{ + DXGI_FORMAT result = DXGI_FORMAT_B8G8R8A8_UNORM; + switch (format) { + case MTY_COLOR_FORMAT_BGR565: + result = DXGI_FORMAT_B5G6R5_UNORM; + break; + case MTY_COLOR_FORMAT_BGRA5551: + result = DXGI_FORMAT_B5G5R5A1_UNORM; + break; + case MTY_COLOR_FORMAT_Y410: // according to MSDN, the view format of Y410 is RGB10A2, just like how that of AYUV is BGRA8 + case MTY_COLOR_FORMAT_RGB10A2: + result = DXGI_FORMAT_R10G10B10A2_UNORM; + break; + case MTY_COLOR_FORMAT_RGBA16F: + result = DXGI_FORMAT_R16G16B16A16_FLOAT; + break; + } + *out_format = format; + + uint8_t bpp = 2; + switch (format) { + case MTY_COLOR_FORMAT_BGRA: + case MTY_COLOR_FORMAT_RGB10A2: + case MTY_COLOR_FORMAT_AYUV: + case MTY_COLOR_FORMAT_Y410: + bpp = 4; + case MTY_COLOR_FORMAT_RGBA16F: + bpp = 8; + } + *out_bpp = bpp; +} + static HRESULT d3d11_reload_textures(struct d3d11 *ctx, ID3D11Device *device, ID3D11DeviceContext *context, const void *image, const MTY_RenderDesc *desc) { @@ -283,25 +316,9 @@ static HRESULT d3d11_reload_textures(struct d3d11 *ctx, ID3D11Device *device, ID case MTY_COLOR_FORMAT_BGRA5551: case MTY_COLOR_FORMAT_RGB10A2: case MTY_COLOR_FORMAT_RGBA16F: { - DXGI_FORMAT format = DXGI_FORMAT_B8G8R8A8_UNORM; - switch (desc->format) { - case MTY_COLOR_FORMAT_BGR565: - format = DXGI_FORMAT_B5G6R5_UNORM; - break; - case MTY_COLOR_FORMAT_BGRA5551: - format = DXGI_FORMAT_B5G5R5A1_UNORM; - break; - case MTY_COLOR_FORMAT_Y410: // according to MSDN, the view format of Y410 is RGB10A2, just like how that of AYUV is BGRA8 - case MTY_COLOR_FORMAT_RGB10A2: - format = DXGI_FORMAT_R10G10B10A2_UNORM; - break; - case MTY_COLOR_FORMAT_RGBA16F: - format = DXGI_FORMAT_R16G16B16A16_FLOAT; - break; - } - uint8_t bpp = (desc->format == MTY_COLOR_FORMAT_BGRA || desc->format == MTY_COLOR_FORMAT_RGB10A2 || desc->format == MTY_COLOR_FORMAT_AYUV || desc->format == MTY_COLOR_FORMAT_Y410) ? 4 : 2; - if (format == DXGI_FORMAT_R16G16B16A16_FLOAT) - bpp = 8; + DXGI_FORMAT format = DXGI_FORMAT_B8G8R8X8_UNORM; + uint8_t bpp = 4; + d3d11_get_dxgi_format_and_bpp(desc->format, &format, &bpp); // BGRA HRESULT e = d3d11_refresh_resource(&ctx->staging[0], device, format, desc->cropWidth, desc->cropHeight); diff --git a/src/windows/gfx/d3d12-ctx.c b/src/windows/gfx/d3d12-ctx.c index fe76c6c55..a23fe46c7 100644 --- a/src/windows/gfx/d3d12-ctx.c +++ b/src/windows/gfx/d3d12-ctx.c @@ -562,7 +562,7 @@ bool mty_d3d12_ctx_make_current(struct gfx_ctx *gfx_ctx, bool current) bool mty_d3d12_ctx_hdr_supported(struct gfx_ctx *gfx_ctx) { - // TODO + // XXX: Write this when we implement D3D12 support for HDR return false; } diff --git a/src/windows/gfx/shaders/d3d11/ps.ps4 b/src/windows/gfx/shaders/d3d11/ps.ps4 index be15d86c8..a24bf4839 100644 --- a/src/windows/gfx/shaders/d3d11/ps.ps4 +++ b/src/windows/gfx/shaders/d3d11/ps.ps4 @@ -62,9 +62,9 @@ inline float4 yuv_to_rgba_rec2020(float y, float u, float v) u = u - C_minmax_2; v = v - C_minmax_2; - y = y * 1.0f / Y_minmax_dt; - u = u * 1.0f / C_minmax_dt; - v = v * 1.0f / C_minmax_dt; + y = y / Y_minmax_dt; + u = u / C_minmax_dt; + v = v / C_minmax_dt; const float K_r = 0.2627f; const float K_b = 0.0593f; @@ -175,6 +175,7 @@ float4 main(VS_OUTPUT input) : SV_TARGET if (format == 9 && colorspace == 3) { // Note the reordering of yuv components here...RGBA maps to UYVA according to https://docs.microsoft.com/en-us/windows/win32/api/dxgiformat/ne-dxgiformat-dxgi_format rgba = yuv_to_rgba_rec2020(u, y, v); + } else { rgba = yuv_to_rgba(y, u, v); } @@ -185,6 +186,7 @@ float4 main(VS_OUTPUT input) : SV_TARGET if (colorspace == 1) { // sRGB - do nothing different rgba = tex0.Sample(ss, uv); + } else { // HDR10 // TODO: Do we need to do a st 2084 encoding/decoding? From b6a2dced613a7ffe51c7fb0e105e5a2aa81e44f3 Mon Sep 17 00:00:00 2001 From: Daniel Vijayakumar Date: Mon, 9 May 2022 16:43:19 -0400 Subject: [PATCH 24/28] Apply suggestions from code review - 2 Co-authored-by: Ronald Huveneers --- src/windows/gfx/d3d11-ctx.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/src/windows/gfx/d3d11-ctx.c b/src/windows/gfx/d3d11-ctx.c index 47851cf29..676482b74 100644 --- a/src/windows/gfx/d3d11-ctx.c +++ b/src/windows/gfx/d3d11-ctx.c @@ -64,12 +64,11 @@ static void mty_validate_format_colorspace(struct d3d11_ctx *ctx, MTY_ColorForma DXGI_COLOR_SPACE_TYPE colorspace_new = DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P709; // Use the last known value if unspecified - if (format == MTY_COLOR_FORMAT_UNKNOWN) { + if (format == MTY_COLOR_FORMAT_UNKNOWN) format_new = ctx->format; - } - if (colorspace == MTY_COLOR_SPACE_UNKNOWN) { + + if (colorspace == MTY_COLOR_SPACE_UNKNOWN) colorspace_new = ctx->colorspace; - } switch (format) { case MTY_COLOR_FORMAT_RGBA16F: @@ -169,9 +168,7 @@ static bool d3d11_ctx_query_hdr_support(struct d3d11_ctx *ctx) float best_intersect_area = -1; IDXGIAdapter1 *adapter1 = NULL; for (UINT j = 0; IDXGIFactory1_EnumAdapters1(ctx->factory1, j, &adapter1) != DXGI_ERROR_NOT_FOUND; j++) { - for (UINT i = 0; IDXGIAdapter1_EnumOutputs(adapter1, i, ¤t_output) != DXGI_ERROR_NOT_FOUND; i++) { - // Get the rectangle bounds of current output DXGI_OUTPUT_DESC desc = {0}; e = IDXGIOutput_GetDesc(current_output, &desc); @@ -209,6 +206,7 @@ static bool d3d11_ctx_query_hdr_support(struct d3d11_ctx *ctx) e = IDXGIOutput6_GetDesc1(output6, &desc1); if (e != S_OK) { MTY_Log("'IDXGIOutput6_GetDesc1' failed with HRESULT 0x%X", e); + } else { r = desc1.ColorSpace == DXGI_COLOR_SPACE_RGB_FULL_G2084_NONE_P2020; // this is the canonical check according to MSDN and NVIDIA } @@ -492,6 +490,7 @@ static void d3d11_ctx_refresh(struct d3d11_ctx *ctx) d3d11_ctx_free(ctx); d3d11_ctx_init(ctx); } + } else if (DXGI_FATAL(e)) { MTY_Log("'IDXGISwapChain2_ResizeBuffers' failed with HRESULT 0x%X", e); d3d11_ctx_free(ctx); @@ -516,9 +515,8 @@ static void d3d11_ctx_refresh(struct d3d11_ctx *ctx) hdr_desc.MaxFrameAverageLightLevel = (UINT16) ctx->hdr_desc.max_frame_average_light_level; HRESULT e = IDXGISwapChain4_SetHDRMetaData(ctx->swap_chain4, DXGI_HDR_METADATA_TYPE_HDR10, sizeof(hdr_desc), &hdr_desc); - if (e != S_OK) { + if (e != S_OK) MTY_Log("Unable to set HDR metadata: 'IDXGISwapChain4_SetHDRMetaData' failed with HRESULT 0x%X", e); - } } } From 0506f616c844c7c8607422cc8ab02223833f61b5 Mon Sep 17 00:00:00 2001 From: Daniel Vijayakumar Date: Mon, 9 May 2022 16:43:49 -0400 Subject: [PATCH 25/28] Addressed more code review comments, including a mem leak fix. --- makefile | 4 ++++ src/windows/gfx/d3d11-ctx.c | 45 ++++++++++++++++++++++--------------- src/windows/gfx/d3d11.c | 2 +- 3 files changed, 32 insertions(+), 19 deletions(-) diff --git a/makefile b/makefile index ddbafdf60..024024acd 100644 --- a/makefile +++ b/makefile @@ -96,6 +96,10 @@ DEFS = \ -DUNICODE \ -DWIN32_LEAN_AND_MEAN +!IFDEF D3D11_CTX_DEBUG +DEFS = $(DEFS) -DD3D11_CTX_DEBUG +!ENDIF + FXCFLAGS = \ /O3 \ /Ges \ diff --git a/src/windows/gfx/d3d11-ctx.c b/src/windows/gfx/d3d11-ctx.c index 676482b74..ce3a19827 100644 --- a/src/windows/gfx/d3d11-ctx.c +++ b/src/windows/gfx/d3d11-ctx.c @@ -23,6 +23,9 @@ GFX_CTX_PROTOTYPES(_d3d11_) ) #define D3D11_CTX_WAIT 2000 +#ifndef D3D11_CTX_DEBUG + #define D3D11_CTX_DEBUG false +#endif struct d3d11_ctx { HWND hwnd; @@ -122,12 +125,12 @@ static bool d3d11_ctx_refresh_window_bounds(struct d3d11_ctx *ctx) RECT window_bounds_new = {0}; GetWindowRect(ctx->hwnd, &window_bounds_new); - LONG dt_left = window_bounds_new.left - ctx->window_bounds.left; - LONG dt_top = window_bounds_new.top - ctx->window_bounds.top; - LONG dt_right = window_bounds_new.right - ctx->window_bounds.right; - LONG dt_bottom = window_bounds_new.bottom - ctx->window_bounds.bottom; + const LONG dt_left = window_bounds_new.left - ctx->window_bounds.left; + const LONG dt_top = window_bounds_new.top - ctx->window_bounds.top; + const LONG dt_right = window_bounds_new.right - ctx->window_bounds.right; + const LONG dt_bottom = window_bounds_new.bottom - ctx->window_bounds.bottom; - changed = labs(dt_left) || labs(dt_top) || labs(dt_right) || labs(dt_bottom); + changed = dt_left || dt_top || dt_right || dt_bottom; ctx->window_bounds = window_bounds_new; @@ -165,7 +168,7 @@ static bool d3d11_ctx_query_hdr_support(struct d3d11_ctx *ctx) // Go through the outputs of each and every adapter IDXGIOutput *current_output = NULL; IDXGIOutput *best_output = NULL; - float best_intersect_area = -1; + LONG best_intersect_area = -1; IDXGIAdapter1 *adapter1 = NULL; for (UINT j = 0; IDXGIFactory1_EnumAdapters1(ctx->factory1, j, &adapter1) != DXGI_ERROR_NOT_FOUND; j++) { for (UINT i = 0; IDXGIAdapter1_EnumOutputs(adapter1, i, ¤t_output) != DXGI_ERROR_NOT_FOUND; i++) { @@ -182,10 +185,15 @@ static bool d3d11_ctx_query_hdr_support(struct d3d11_ctx *ctx) const LONG by2 = output_bounds.bottom; // Compute the intersection and see if its the best fit - const LONG intersect_area = max(0, min(ax2, bx2) - max(ax1, bx1)) * max(0, min(ay2, by2) - max(ay1, by1)); // courtesy of https://github.com/microsoft/DirectX-Graphics-Samples/blob/c79f839da1bb2db77d2306be5e4e664a5d23a36b/Samples/Desktop/D3D12HDR/src/D3D12HDR.cpp#L1046 + // Courtesy of https://github.com/microsoft/DirectX-Graphics-Samples/blob/c79f839da1bb2db77d2306be5e4e664a5d23a36b/Samples/Desktop/D3D12HDR/src/D3D12HDR.cpp#L1046 + const LONG intersect_area = max(0, min(ax2, bx2) - max(ax1, bx1)) * max(0, min(ay2, by2) - max(ay1, by1)); if (intersect_area > best_intersect_area) { + if (best_output != NULL) + IDXGIOutput_Release(best_output); + best_output = current_output; - best_intersect_area = (float) intersect_area; // not sure why but the MSDN sample stores this as float when its all integer math...it works though! + best_intersect_area = intersect_area; + } else { IDXGIOutput_Release(current_output); } @@ -273,7 +281,7 @@ static bool d3d11_ctx_init(struct d3d11_ctx *ctx) ctx->colorspace = MTY_COLOR_SPACE_SRGB; DXGI_SWAP_CHAIN_DESC1 sd = {0}; - sd.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + sd.Format = DXGI_FORMAT_B8G8R8A8_UNORM; sd.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; sd.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD; sd.SampleDesc.Count = 1; @@ -282,9 +290,8 @@ static bool d3d11_ctx_init(struct d3d11_ctx *ctx) D3D_FEATURE_LEVEL levels[] = {D3D_FEATURE_LEVEL_11_1, D3D_FEATURE_LEVEL_11_0}; UINT flags = 0; - #ifdef DEBUG + if (D3D11_CTX_DEBUG) flags |= D3D11_CREATE_DEVICE_DEBUG; - #endif HRESULT e = D3D11CreateDevice(NULL, D3D_DRIVER_TYPE_HARDWARE, NULL, flags, levels, sizeof(levels) / sizeof(D3D_FEATURE_LEVEL), D3D11_SDK_VERSION, &ctx->device, NULL, &ctx->context); if (e != S_OK) { @@ -454,8 +461,9 @@ static void d3d11_ctx_refresh(struct d3d11_ctx *ctx) d3d11_ctx_get_size(ctx, &width, &height); if (ctx->width != width || ctx->height != height) { + // DXGI_FORMAT_UNKNOWN will resize without changing the existing format HRESULT e = IDXGISwapChain2_ResizeBuffers(ctx->swap_chain2, 0, 0, 0, - DXGI_FORMAT_UNKNOWN, D3D11_SWFLAGS); // unknown format will resize without changing the existing format + DXGI_FORMAT_UNKNOWN, D3D11_SWFLAGS); if (e == S_OK) { ctx->width = width; @@ -636,12 +644,13 @@ bool mty_d3d11_ctx_hdr_supported(struct gfx_ctx *gfx_ctx) if (!ctx->hdr_init) { ctx->hdr_supported = false; - } else if ( - d3d11_ctx_refresh_window_bounds(ctx) // check whether window was moved to another screen - || !ctx->factory1 - || !IDXGIFactory1_IsCurrent(ctx->factory1) // display adapter reset for a variety of reasons - ) { - ctx->hdr_supported = d3d11_ctx_query_hdr_support(ctx); + + } else { + const bool adapter_reset = !ctx->factory1 || !IDXGIFactory1_IsCurrent(ctx->factory1); + const bool window_moved = d3d11_ctx_refresh_window_bounds(ctx); // includes when moved to different display + if (window_moved || adapter_reset) { + ctx->hdr_supported = d3d11_ctx_query_hdr_support(ctx); + } } return ctx->hdr_supported; diff --git a/src/windows/gfx/d3d11.c b/src/windows/gfx/d3d11.c index d844ef020..9de3a8807 100644 --- a/src/windows/gfx/d3d11.c +++ b/src/windows/gfx/d3d11.c @@ -316,7 +316,7 @@ static HRESULT d3d11_reload_textures(struct d3d11 *ctx, ID3D11Device *device, ID case MTY_COLOR_FORMAT_BGRA5551: case MTY_COLOR_FORMAT_RGB10A2: case MTY_COLOR_FORMAT_RGBA16F: { - DXGI_FORMAT format = DXGI_FORMAT_B8G8R8X8_UNORM; + DXGI_FORMAT format = DXGI_FORMAT_B8G8R8A8_UNORM; uint8_t bpp = 4; d3d11_get_dxgi_format_and_bpp(desc->format, &format, &bpp); From 61684cf3d566a2a70ff7717f2299dea7715c3ad0 Mon Sep 17 00:00:00 2001 From: Daniel Vijayakumar Date: Tue, 10 May 2022 11:58:54 -0400 Subject: [PATCH 26/28] Addressed further review comment. Namely, cleaned up the format/colorspace validation code in d3d11-ctx.c. --- src/windows/gfx/d3d11-ctx.c | 113 +++++++++++++++++++----------------- 1 file changed, 61 insertions(+), 52 deletions(-) diff --git a/src/windows/gfx/d3d11-ctx.c b/src/windows/gfx/d3d11-ctx.c index ce3a19827..006ecca6c 100644 --- a/src/windows/gfx/d3d11-ctx.c +++ b/src/windows/gfx/d3d11-ctx.c @@ -27,6 +27,50 @@ GFX_CTX_PROTOTYPES(_d3d11_) #define D3D11_CTX_DEBUG false #endif + +static const MTY_ColorSpace MTY_FORMAT_TO_EXPECTED_MTY_COLORSPACE[] = { + [MTY_COLOR_FORMAT_UNKNOWN] = MTY_COLOR_SPACE_UNKNOWN, + [MTY_COLOR_FORMAT_BGRA] = MTY_COLOR_SPACE_SRGB, + [MTY_COLOR_FORMAT_NV12] = MTY_COLOR_SPACE_SRGB, + [MTY_COLOR_FORMAT_I420] = MTY_COLOR_SPACE_SRGB, + [MTY_COLOR_FORMAT_I444] = MTY_COLOR_SPACE_SRGB, + [MTY_COLOR_FORMAT_NV16] = MTY_COLOR_SPACE_SRGB, + [MTY_COLOR_FORMAT_BGR565] = MTY_COLOR_SPACE_SRGB, + [MTY_COLOR_FORMAT_BGRA5551] = MTY_COLOR_SPACE_SRGB, + [MTY_COLOR_FORMAT_AYUV] = MTY_COLOR_SPACE_SRGB, + [MTY_COLOR_FORMAT_Y410] = MTY_COLOR_SPACE_HDR10, + [MTY_COLOR_FORMAT_P016] = MTY_COLOR_SPACE_HDR10, + [MTY_COLOR_FORMAT_I444_10] = MTY_COLOR_SPACE_HDR10, + [MTY_COLOR_FORMAT_I444_16] = MTY_COLOR_SPACE_HDR10, + [MTY_COLOR_FORMAT_RGB10A2] = MTY_COLOR_SPACE_HDR10, + [MTY_COLOR_FORMAT_RGBA16F] = MTY_COLOR_SPACE_SCRGB_LINEAR, +}; + +static const DXGI_FORMAT MTY_FORMAT_TO_DXGI_FORMAT[] = { + [MTY_COLOR_FORMAT_UNKNOWN] = DXGI_FORMAT_UNKNOWN, + [MTY_COLOR_FORMAT_BGRA] = DXGI_FORMAT_B8G8R8A8_UNORM, + [MTY_COLOR_FORMAT_NV12] = DXGI_FORMAT_UNKNOWN, + [MTY_COLOR_FORMAT_I420] = DXGI_FORMAT_UNKNOWN, + [MTY_COLOR_FORMAT_I444] = DXGI_FORMAT_UNKNOWN, + [MTY_COLOR_FORMAT_NV16] = DXGI_FORMAT_UNKNOWN, + [MTY_COLOR_FORMAT_BGR565] = DXGI_FORMAT_UNKNOWN, + [MTY_COLOR_FORMAT_BGRA5551] = DXGI_FORMAT_UNKNOWN, + [MTY_COLOR_FORMAT_AYUV] = DXGI_FORMAT_UNKNOWN, + [MTY_COLOR_FORMAT_Y410] = DXGI_FORMAT_UNKNOWN, + [MTY_COLOR_FORMAT_P016] = DXGI_FORMAT_UNKNOWN, + [MTY_COLOR_FORMAT_I444_10] = DXGI_FORMAT_UNKNOWN, + [MTY_COLOR_FORMAT_I444_16] = DXGI_FORMAT_UNKNOWN, + [MTY_COLOR_FORMAT_RGB10A2] = DXGI_FORMAT_R10G10B10A2_UNORM, + [MTY_COLOR_FORMAT_RGBA16F] = DXGI_FORMAT_R16G16B16A16_FLOAT, +}; + +static const DXGI_COLOR_SPACE_TYPE MTY_COLORSPACE_TO_DXGI_COLORSPACE[] = { + [MTY_COLOR_SPACE_UNKNOWN] = DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P709, + [MTY_COLOR_SPACE_SRGB] = DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P709, + [MTY_COLOR_SPACE_SCRGB_LINEAR] = DXGI_COLOR_SPACE_RGB_FULL_G10_NONE_P709, + [MTY_COLOR_SPACE_HDR10] = DXGI_COLOR_SPACE_RGB_FULL_G2084_NONE_P2020, +}; + struct d3d11_ctx { HWND hwnd; uint32_t width; @@ -63,55 +107,20 @@ static void d3d11_ctx_get_size(struct d3d11_ctx *ctx, uint32_t *width, uint32_t static void mty_validate_format_colorspace(struct d3d11_ctx *ctx, MTY_ColorFormat format, MTY_ColorSpace colorspace, DXGI_FORMAT *format_out, DXGI_COLOR_SPACE_TYPE *colorspace_out) { - DXGI_FORMAT format_new = DXGI_FORMAT_R8G8B8A8_UNORM; - DXGI_COLOR_SPACE_TYPE colorspace_new = DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P709; - - // Use the last known value if unspecified - if (format == MTY_COLOR_FORMAT_UNKNOWN) - format_new = ctx->format; - - if (colorspace == MTY_COLOR_SPACE_UNKNOWN) - colorspace_new = ctx->colorspace; - - switch (format) { - case MTY_COLOR_FORMAT_RGBA16F: - format_new = DXGI_FORMAT_R16G16B16A16_FLOAT; - break; - case MTY_COLOR_FORMAT_RGB10A2: - format_new = DXGI_FORMAT_R10G10B10A2_UNORM; - break; - } - - switch (colorspace) { - case MTY_COLOR_SPACE_SRGB: - colorspace_new = DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P709; - break; - case MTY_COLOR_SPACE_SCRGB_LINEAR: - colorspace_new = DXGI_COLOR_SPACE_RGB_FULL_G10_NONE_P709; - break; - case MTY_COLOR_SPACE_HDR10: - colorspace_new = DXGI_COLOR_SPACE_RGB_FULL_G2084_NONE_P2020; - break; - } - - // Ensure that the format and colorspace are a valid pairing - switch (colorspace_new) { - case DXGI_COLOR_SPACE_RGB_FULL_G10_NONE_P709: { - if (format_new != DXGI_FORMAT_R16G16B16A16_FLOAT) { - MTY_Log("Format 0x%X is not meant for colorspace 0x%X. Forcing format to 0x%X.", format_new, DXGI_COLOR_SPACE_RGB_FULL_G10_NONE_P709, DXGI_FORMAT_R16G16B16A16_FLOAT); - format_new = DXGI_FORMAT_R16G16B16A16_FLOAT; - } - break; + // Default to last known values + DXGI_FORMAT format_new = ctx->format; + DXGI_COLOR_SPACE_TYPE colorspace_new = ctx->colorspace; + + if (format != MTY_COLOR_FORMAT_UNKNOWN && colorspace != MTY_COLOR_SPACE_UNKNOWN) { + // Align the color space to the given format + MTY_ColorSpace colorspace_expected = MTY_FORMAT_TO_EXPECTED_MTY_COLORSPACE[format]; + if (colorspace_expected != colorspace) { + MTY_Log("Expected MTY colorspace 0x%X for MTY format 0x%X but found 0x%X. Forcing colorspace to 0x%X.", colorspace_expected, format, colorspace, colorspace_expected); + colorspace = colorspace_expected; } - case DXGI_COLOR_SPACE_RGB_FULL_G2084_NONE_P2020: { - if (format_new != DXGI_FORMAT_R10G10B10A2_UNORM) { - MTY_Log("Format 0x%X is not meant for colorspace 0x%X. Forcing format to 0x%X.", format_new, DXGI_COLOR_SPACE_RGB_FULL_G2084_NONE_P2020, DXGI_FORMAT_R10G10B10A2_UNORM); - format_new = DXGI_FORMAT_R10G10B10A2_UNORM; - } - break; - } - default: - break; + + format_new = MTY_FORMAT_TO_DXGI_FORMAT[format]; + colorspace_new = MTY_COLORSPACE_TO_DXGI_COLORSPACE[colorspace]; } *format_out = format_new; @@ -277,8 +286,8 @@ static bool d3d11_ctx_init(struct d3d11_ctx *ctx) IDXGIFactory2 *factory2 = NULL; IDXGISwapChain1 *swap_chain1 = NULL; - ctx->format = MTY_COLOR_FORMAT_BGRA; - ctx->colorspace = MTY_COLOR_SPACE_SRGB; + ctx->format = DXGI_FORMAT_B8G8R8A8_UNORM; + ctx->colorspace = DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P709; DXGI_SWAP_CHAIN_DESC1 sd = {0}; sd.Format = DXGI_FORMAT_B8G8R8A8_UNORM; @@ -480,7 +489,7 @@ static void d3d11_ctx_refresh(struct d3d11_ctx *ctx) bool hdr = ctx->format_new == DXGI_FORMAT_R16G16B16A16_FLOAT || ctx->colorspace_new == DXGI_COLOR_SPACE_RGB_FULL_G2084_NONE_P2020; if (ctx->hdr != hdr) { - // If in HDR mode, we keep swap chain in HDR10 (rec2020 10-bit RGB + ST2084 PQ); otherwise in SDR mode, it's the standard RGBA8 sRGB + // If in HDR mode, we keep swap chain in HDR10 (rec2020 10-bit RGB + ST2084 PQ); otherwise in SDR mode, it's the standard BGRA8 sRGB DXGI_FORMAT format = hdr ? DXGI_FORMAT_R10G10B10A2_UNORM : DXGI_FORMAT_B8G8R8A8_UNORM; DXGI_COLOR_SPACE_TYPE colorspace = hdr ? DXGI_COLOR_SPACE_RGB_FULL_G2084_NONE_P2020 : DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P709; @@ -607,7 +616,7 @@ void mty_d3d11_ctx_draw_ui(struct gfx_ctx *gfx_ctx, const MTY_DrawData *dd) MTY_DrawData dd_mutated = *dd; dd_mutated.hdr = ctx->composite_ui; - ctx->format_new = dd_mutated.hdr ? DXGI_FORMAT_R16G16B16A16_FLOAT : DXGI_FORMAT_R8G8B8A8_UNORM; + ctx->format_new = dd_mutated.hdr ? DXGI_FORMAT_R16G16B16A16_FLOAT : DXGI_FORMAT_B8G8R8A8_UNORM; ctx->colorspace_new = dd_mutated.hdr ? DXGI_COLOR_SPACE_RGB_FULL_G10_NONE_P709 : DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P709; mty_d3d11_ctx_get_surface(gfx_ctx); From 3577ca6f1f50f667fd89540fe70477a2f56a7ead Mon Sep 17 00:00:00 2001 From: Daniel Vijayakumar Date: Fri, 13 May 2022 08:14:20 -0400 Subject: [PATCH 27/28] Updated YUV -> RGB conversion for HDR to include support for full range video. For now, HDR video will always be assumed to cover the full range. --- src/windows/gfx/shaders/d3d11/ps.ps4 | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/src/windows/gfx/shaders/d3d11/ps.ps4 b/src/windows/gfx/shaders/d3d11/ps.ps4 index a24bf4839..2d31f1a1f 100644 --- a/src/windows/gfx/shaders/d3d11/ps.ps4 +++ b/src/windows/gfx/shaders/d3d11/ps.ps4 @@ -45,16 +45,16 @@ inline float4 yuv_to_rgba(float y, float u, float v) return float4(r, g, b, 1.0); } -inline float4 yuv_to_rgba_rec2020(float y, float u, float v) +inline float4 yuv_to_rgba_rec2020(float y, float u, float v, bool full_range) { // Using "RGB to YCbCr color conversion for UHDTV" (ITU-R BT.2020) // Thanks to https://github.com/colour-science/colour/blob/c3735e5d0ad67443022ece0b42b575e040eb61d1/colour/models/rgb/ycbcr.py#L472 - const float Y_min = 0.06256109f; - const float Y_max = 0.91886608f; + const float Y_min = full_range ? 0.0f : 0.06256109f; + const float Y_max = full_range ? 1.0f : 0.91886608f; const float Y_minmax_dt = Y_max - Y_min; - const float C_min = 0.06256109f; - const float C_max = 0.93841642f; + const float C_min = full_range ? 0.0f : 0.06256109f; + const float C_max = full_range ? 1.0f : 0.93841642f; const float C_minmax_2 = (C_min + C_max) * 0.5f; const float C_minmax_dt = C_max - C_min; @@ -78,6 +78,8 @@ inline float4 yuv_to_rgba_rec2020(float y, float u, float v) static const float SDR_MAX_NITS = 80.0f; +static const bool YCBCR_FULL_RANGE = true; + inline void gaussian(uint type, float w, float h, inout float2 uv) { float2 res = float2(w, h); @@ -138,7 +140,7 @@ float4 main(VS_OUTPUT input) : SV_TARGET float v = tex1.Sample(ss, uv).g; if (format == 10 && colorspace == 3) { - rgba = yuv_to_rgba_rec2020(y, u, v); + rgba = yuv_to_rgba_rec2020(y, u, v, YCBCR_FULL_RANGE); } else { rgba = yuv_to_rgba(y, u, v); @@ -160,7 +162,7 @@ float4 main(VS_OUTPUT input) : SV_TARGET } if ((format == 11 || format == 12) && colorspace == 3) { - rgba = yuv_to_rgba_rec2020(y, u, v); + rgba = yuv_to_rgba_rec2020(y, u, v, YCBCR_FULL_RANGE); } else { rgba = yuv_to_rgba(y, u, v); @@ -174,7 +176,7 @@ float4 main(VS_OUTPUT input) : SV_TARGET if (format == 9 && colorspace == 3) { // Note the reordering of yuv components here...RGBA maps to UYVA according to https://docs.microsoft.com/en-us/windows/win32/api/dxgiformat/ne-dxgiformat-dxgi_format - rgba = yuv_to_rgba_rec2020(u, y, v); + rgba = yuv_to_rgba_rec2020(u, y, v, YCBCR_FULL_RANGE); } else { rgba = yuv_to_rgba(y, u, v); From b9f31394812ce4b8103125f9383f1c7daadf5582 Mon Sep 17 00:00:00 2001 From: Daniel Vijayakumar Date: Mon, 16 May 2022 16:09:28 -0400 Subject: [PATCH 28/28] Full range vs legal range color conversion can now be specified via a flag. For now, it only applies to HDR. Meaning SDR is always legal range for now. --- src/matoya.h | 1 + src/windows/gfx/d3d11.c | 5 ++++- src/windows/gfx/d3d12.c | 3 ++- src/windows/gfx/shaders/d3d11/ps.ps4 | 10 ++++------ 4 files changed, 11 insertions(+), 8 deletions(-) diff --git a/src/matoya.h b/src/matoya.h index bab06c430..4c1e955cb 100644 --- a/src/matoya.h +++ b/src/matoya.h @@ -145,6 +145,7 @@ typedef struct { float scale; ///< Multiplier applied to the dimensions of the image, producing an ///< minimized or magnified image. This can be set to 0 ///< if unnecessary. + bool fullRange; ///< If true, then the image components comprise the range [0, 1]; else, they comprise the "partial" or "legal" range based on the format of the image. bool hdrDescSpecified; ///< Is HDR metadata provided. Only relevant if format + colorspace indicate an HDR image. MTY_HDRDesc hdrDesc; ///< HDR metadata for the image. Only relevant if format + colorspace indicate an HDR image. } MTY_RenderDesc; diff --git a/src/windows/gfx/d3d11.c b/src/windows/gfx/d3d11.c index 9de3a8807..fb38732e0 100644 --- a/src/windows/gfx/d3d11.c +++ b/src/windows/gfx/d3d11.c @@ -28,7 +28,9 @@ struct d3d11_psvars { uint32_t effect; uint32_t format; uint32_t colorspace; - uint32_t rotation; // Constant buffers must be in increments of 16 bytes + uint32_t rotation; + uint32_t full_range; // 1 = full range video, 0 = limited/partial range video + uint32_t __pad[3]; // Constant buffers must be in increments of 16 bytes }; struct d3d11_res { @@ -472,6 +474,7 @@ bool mty_d3d11_render(struct gfx *gfx, MTY_Device *device, MTY_Context *context, cb.format = ctx->format; cb.colorspace = desc->colorspace; cb.rotation = desc->rotation; + cb.full_range = desc->fullRange; D3D11_MAPPED_SUBRESOURCE res = {0}; e = ID3D11DeviceContext_Map(_context, ctx->psbres, 0, D3D11_MAP_WRITE_DISCARD, 0, &res); diff --git a/src/windows/gfx/d3d12.c b/src/windows/gfx/d3d12.c index ba191aa87..2fdb0b4c8 100644 --- a/src/windows/gfx/d3d12.c +++ b/src/windows/gfx/d3d12.c @@ -31,7 +31,8 @@ struct d3d12_psvars { uint32_t effect; uint32_t format; uint32_t rotation; - uint32_t __pad[1]; // D3D11 needs 16-byte aligned, unsure about D3D12 + uint32_t full_range; // 1 = full range video, 0 = limited/partial range video + uint32_t __pad[3]; // D3D11 needs 16-byte aligned, unsure about D3D12 }; struct d3d12_res { diff --git a/src/windows/gfx/shaders/d3d11/ps.ps4 b/src/windows/gfx/shaders/d3d11/ps.ps4 index 2d31f1a1f..bd0587854 100644 --- a/src/windows/gfx/shaders/d3d11/ps.ps4 +++ b/src/windows/gfx/shaders/d3d11/ps.ps4 @@ -20,7 +20,7 @@ cbuffer VS_CONSTANT_BUFFER : register(b0) { uint format; uint colorspace; uint rotation; - uint __pad[1]; + bool ycbcr_full_range; }; SamplerState ss { @@ -78,8 +78,6 @@ inline float4 yuv_to_rgba_rec2020(float y, float u, float v, bool full_range) static const float SDR_MAX_NITS = 80.0f; -static const bool YCBCR_FULL_RANGE = true; - inline void gaussian(uint type, float w, float h, inout float2 uv) { float2 res = float2(w, h); @@ -140,7 +138,7 @@ float4 main(VS_OUTPUT input) : SV_TARGET float v = tex1.Sample(ss, uv).g; if (format == 10 && colorspace == 3) { - rgba = yuv_to_rgba_rec2020(y, u, v, YCBCR_FULL_RANGE); + rgba = yuv_to_rgba_rec2020(y, u, v, ycbcr_full_range); } else { rgba = yuv_to_rgba(y, u, v); @@ -162,7 +160,7 @@ float4 main(VS_OUTPUT input) : SV_TARGET } if ((format == 11 || format == 12) && colorspace == 3) { - rgba = yuv_to_rgba_rec2020(y, u, v, YCBCR_FULL_RANGE); + rgba = yuv_to_rgba_rec2020(y, u, v, ycbcr_full_range); } else { rgba = yuv_to_rgba(y, u, v); @@ -176,7 +174,7 @@ float4 main(VS_OUTPUT input) : SV_TARGET if (format == 9 && colorspace == 3) { // Note the reordering of yuv components here...RGBA maps to UYVA according to https://docs.microsoft.com/en-us/windows/win32/api/dxgiformat/ne-dxgiformat-dxgi_format - rgba = yuv_to_rgba_rec2020(u, y, v, YCBCR_FULL_RANGE); + rgba = yuv_to_rgba_rec2020(u, y, v, ycbcr_full_range); } else { rgba = yuv_to_rgba(y, u, v);