diff --git a/makefile b/makefile index ddbafdf60..024024acd 100644 --- a/makefile +++ b/makefile @@ -96,6 +96,10 @@ DEFS = \ -DUNICODE \ -DWIN32_LEAN_AND_MEAN +!IFDEF D3D11_CTX_DEBUG +DEFS = $(DEFS) -DD3D11_CTX_DEBUG +!ENDIF + FXCFLAGS = \ /O3 \ /Ges \ diff --git a/src/app.c b/src/app.c index 87ce6c30b..6ddf8ec4d 100644 --- a/src/app.c +++ b/src/app.c @@ -129,6 +129,14 @@ bool MTY_WindowSetGFX(MTY_App *app, MTY_Window window, MTY_GFX api, bool vsync) return gfx_ctx ? true : false; } +bool MTY_WindowIsHDRSupported(MTY_App *app, MTY_Window window) +{ + struct gfx_ctx *gfx_ctx = NULL; + MTY_GFX api = mty_window_get_gfx(app, window, &gfx_ctx); + + return api != MTY_GFX_NONE && GFX_CTX_API[api].hdr_supported(gfx_ctx); +} + // Event utility diff --git a/src/gfx/mod-ctx.h b/src/gfx/mod-ctx.h index c5b1865ba..3048fce74 100644 --- a/src/gfx/mod-ctx.h +++ b/src/gfx/mod-ctx.h @@ -31,7 +31,8 @@ struct gfx_ctx; bool wrap(api, set_ui_texture)(struct gfx_ctx *gfx_ctx, uint32_t id, const void *rgba, \ uint32_t width, uint32_t height); \ bool wrap(api, has_ui_texture)(struct gfx_ctx *gfx_ctx, uint32_t id); \ - bool wrap(api, make_current)(struct gfx_ctx *gfx_ctx, bool current); + bool wrap(api, make_current)(struct gfx_ctx *gfx_ctx, bool current); \ + bool wrap(api, hdr_supported)(struct gfx_ctx *gfx_ctx); #define GFX_CTX_PROTOTYPES(api) \ GFX_CTX_DECLARE_API(api, GFX_CTX_PROTO) @@ -49,4 +50,5 @@ struct gfx_ctx; mty##api##ctx_set_ui_texture, \ mty##api##ctx_has_ui_texture, \ mty##api##ctx_make_current, \ + mty##api##ctx_hdr_supported, \ }, diff --git a/src/matoya.h b/src/matoya.h index 7c763967d..4c1e955cb 100644 --- a/src/matoya.h +++ b/src/matoya.h @@ -58,21 +58,37 @@ typedef enum { /// @brief Raw image color formats. typedef enum { - MTY_COLOR_FORMAT_UNKNOWN = 0, ///< Unknown color format. - MTY_COLOR_FORMAT_BGRA = 1, ///< 8-bits per channel BGRA. - MTY_COLOR_FORMAT_NV12 = 2, ///< 4:2:0 full W/H Y plane followed by an interleaved half - ///< W/H UV plane. - MTY_COLOR_FORMAT_I420 = 3, ///< 4:2:0 full W/H Y plane followed by a half W/H U plane - ///< followed by a half W/H V plane. - MTY_COLOR_FORMAT_I444 = 4, ///< 4:4:4 full W/H consecutive Y, U, V planes. - MTY_COLOR_FORMAT_NV16 = 5, ///< 4:2:2 full W/H Y plane followed by an interleaved half W - ///< full H UV plane. - MTY_COLOR_FORMAT_BGR565 = 6, ///< 5-bits blue, 6-bits green, 5-bits red. - MTY_COLOR_FORMAT_BGRA5551 = 7, ///< 5-bits per BGR channels, 1-bit alpha. - MTY_COLOR_FORMAT_AYUV = 8, ///< 4:4:4 full W/H interleaved Y, U, V. - MTY_COLOR_FORMAT_MAKE_32 = INT32_MAX, + MTY_COLOR_FORMAT_UNKNOWN = 0, ///< Unknown color format. + MTY_COLOR_FORMAT_BGRA = 1, ///< 8-bits per channel BGRA. + MTY_COLOR_FORMAT_NV12 = 2, ///< 4:2:0 full W/H Y plane followed by an interleaved half + ///< W/H UV plane. + MTY_COLOR_FORMAT_I420 = 3, ///< 4:2:0 full W/H Y plane followed by a half W/H U plane + ///< followed by a half W/H V plane. + MTY_COLOR_FORMAT_I444 = 4, ///< 4:4:4 full W/H consecutive Y, U, V planes. + MTY_COLOR_FORMAT_NV16 = 5, ///< 4:2:2 full W/H Y plane followed by an interleaved half W + ///< full H UV plane. + MTY_COLOR_FORMAT_BGR565 = 6, ///< 5-bits blue, 6-bits green, 5-bits red. + MTY_COLOR_FORMAT_BGRA5551 = 7, ///< 5-bits per BGR channels, 1-bit alpha. + MTY_COLOR_FORMAT_AYUV = 8, ///< 4:4:4 full W/H interleaved Y, U, V. + MTY_COLOR_FORMAT_Y410 = 9, ///< 4:4:4 full W/H interleaved Y, U, V. 10-bit YUV, 2-bit alpha. + MTY_COLOR_FORMAT_P016 = 10, ///< 4:2:0 full W/H Y plane followed by a half W/H U plane + ///< followed by a half W/H V plane. Supports 10-bit to 16-bit data. + MTY_COLOR_FORMAT_I444_10 = 11, ///< 4:4:4 full W/H consecutive Y, U, V planes. Supports 10-bit data. + MTY_COLOR_FORMAT_I444_16 = 12, ///< 4:4:4 full W/H consecutive Y, U, V planes. Supports 16-bit data. + MTY_COLOR_FORMAT_RGB10A2 = 13, ///< 10-bits per RGB channels, 2-bit alpha. + MTY_COLOR_FORMAT_RGBA16F = 14, ///< 16-bits floating-point precision per channel RGBA. + MTY_COLOR_FORMAT_MAKE_32 = INT32_MAX, } MTY_ColorFormat; +/// @brief Defines the color encoding of the raw image. Note that certain color spaces and color formats are tightly coupled with each other. +typedef enum { + MTY_COLOR_SPACE_UNKNOWN = 0, ///< Unknown color space. + MTY_COLOR_SPACE_SRGB = 1, ///< sRGB/rec709 primaries and a non-linear transfer function (approx gamma curve of 2.2). Supported by all color formats. + MTY_COLOR_SPACE_SCRGB_LINEAR = 2, ///< Microsoft's scRGB wide gamut color space which is based on sRGB/rec709 primaries and has a linear transfer function. Only supported by color format MTY_COLOR_FORMAT_RGBA16F. + MTY_COLOR_SPACE_HDR10 = 3, ///< Uses the rec2020 color primaries and the rec2100 non-linear transfer function (ST 2084 perceptual quantizer, aka PQ). Only supported by color format MTY_COLOR_FORMAT_RGB10A2. + MTY_COLOR_SPACE_MAKE_32 = INT32_MAX, +} MTY_ColorSpace; + /// @brief Quad texture filtering. typedef enum { MTY_FILTER_NEAREST = 0, ///< Nearest neighbor filter by the GPU, can cause shimmering. @@ -99,23 +115,39 @@ typedef enum { MTY_ROTATION_MAKE_32 = INT32_MAX, } MTY_Rotation; +/// @brief HDR metadata associated with an image being rendered. +typedef struct { + float color_primary_red[2]; ///< xy coordinates for the red primary of the image's color space according to the CIE 1931 color space chromaticity diagram. + float color_primary_green[2]; ///< xy coordinates for the green primary of the image's color space according to the CIE 1931 color space chromaticity diagram. + float color_primary_blue[2]; ///< xy coordinates for the blue primary of the image's color space according to the CIE 1931 color space chromaticity diagram. + float white_point[2]; ///< xy coordinates for the white point of the image's color space according to the CIE 1931 color space chromaticity diagram. + float min_luminance; ///< Min luminance supported by the display that is the source of the image. + float max_luminance; ///< Max luminance supported by the display that is the source of the image. + float max_content_light_level; ///< MaxCLL. This is the nit value of the brightest possible pixel that could ever occur in an image. If unknown, you can set it to max_luminance. + float max_frame_average_light_level; ///< MaxFALL. This is the highest nit value that an image's average luminance is expected to have. If unknown, you can set it to MaxCLL. +} MTY_HDRDesc; + /// @brief Description of a render operation. typedef struct { - MTY_ColorFormat format; ///< The color format of a raw image. - MTY_Rotation rotation; ///< Rotation applied to the image. - MTY_Filter filter; ///< Filter applied to the image. - MTY_Effect effect; ///< Effect applied to the image. - uint32_t imageWidth; ///< The width in pixels of the image. - uint32_t imageHeight; ///< The height in pixels of the image. - uint32_t cropWidth; ///< Desired crop width of the image from the top left corner. - uint32_t cropHeight; ///< Desired crop height of the image from the top left corner. - uint32_t viewWidth; ///< The width of the viewport. - uint32_t viewHeight; ///< The height of the viewport. - float aspectRatio; ///< Desired aspect ratio of the image. The renderer will letterbox - ///< the image to maintain the specified aspect ratio. - float scale; ///< Multiplier applied to the dimensions of the image, producing an - ///< minimized or magnified image. This can be set to 0 - ///< if unnecessary. + MTY_ColorFormat format; ///< The color format of a raw image. + MTY_ColorSpace colorspace; ///< Defines the color encoding of the image. + MTY_Rotation rotation; ///< Rotation applied to the image. + MTY_Filter filter; ///< Filter applied to the image. + MTY_Effect effect; ///< Effect applied to the image. + uint32_t imageWidth; ///< The width in pixels of the image. + uint32_t imageHeight; ///< The height in pixels of the image. + uint32_t cropWidth; ///< Desired crop width of the image from the top left corner. + uint32_t cropHeight; ///< Desired crop height of the image from the top left corner. + uint32_t viewWidth; ///< The width of the viewport. + uint32_t viewHeight; ///< The height of the viewport. + float aspectRatio; ///< Desired aspect ratio of the image. The renderer will letterbox + ///< the image to maintain the specified aspect ratio. + float scale; ///< Multiplier applied to the dimensions of the image, producing an + ///< minimized or magnified image. This can be set to 0 + ///< if unnecessary. + bool fullRange; ///< If true, then the image components comprise the range [0, 1]; else, they comprise the "partial" or "legal" range based on the format of the image. + bool hdrDescSpecified; ///< Is HDR metadata provided. Only relevant if format + colorspace indicate an HDR image. + MTY_HDRDesc hdrDesc; ///< HDR metadata for the image. Only relevant if format + colorspace indicate an HDR image. } MTY_RenderDesc; /// @brief A point with an `x` and `y` coordinate. @@ -170,6 +202,7 @@ typedef struct { uint32_t idxTotalLength; ///< Total number of indices in all command lists. uint32_t vtxTotalLength; ///< Total number of vertices in all command lists. bool clear; ///< Surface should be cleared before drawing. + bool hdr; ///< UI in SDR will be composited on top of an HDR quad. } MTY_DrawData; /// @brief Create an MTY_Renderer capable of executing drawing commands. @@ -1257,6 +1290,13 @@ MTY_WindowSetGFX(MTY_App *app, MTY_Window window, MTY_GFX api, bool vsync); MTY_EXPORT MTY_ContextState MTY_WindowGetContextState(MTY_App *app, MTY_Window window); +/// @brief Tells you whether the window can show HDR content or not. +/// @param app The MTY_App. +/// @param window An MTY_Window. +/// @returns Returns true if the window can render HDR content, false otherwise. +MTY_EXPORT bool +MTY_WindowIsHDRSupported(MTY_App *app, MTY_Window window); + /// @brief Get the string representation of a key combination. /// @details This function attempts to use the current locale. /// @param mod Combo modifier. diff --git a/src/unix/apple/macosx/gfx/gl-ctx.m b/src/unix/apple/macosx/gfx/gl-ctx.m index 9551a6b21..40fd9aa0f 100644 --- a/src/unix/apple/macosx/gfx/gl-ctx.m +++ b/src/unix/apple/macosx/gfx/gl-ctx.m @@ -190,3 +190,8 @@ bool mty_gl_ctx_make_current(struct gfx_ctx *gfx_ctx, bool current) return true; } + +bool mty_gl_ctx_hdr_supported(struct gfx_ctx *gfx_ctx) +{ + return false; +} diff --git a/src/unix/apple/macosx/gfx/metal-ctx.m b/src/unix/apple/macosx/gfx/metal-ctx.m index ccdff6e26..7478edc83 100644 --- a/src/unix/apple/macosx/gfx/metal-ctx.m +++ b/src/unix/apple/macosx/gfx/metal-ctx.m @@ -183,3 +183,8 @@ bool mty_metal_ctx_make_current(struct gfx_ctx *gfx_ctx, bool current) { return false; } + +bool mty_metal_ctx_hdr_supported(struct gfx_ctx *gfx_ctx) +{ + return false; +} diff --git a/src/unix/linux/android/gfx/gl-ctx.c b/src/unix/linux/android/gfx/gl-ctx.c index 2dd4a5e2d..ea9454125 100644 --- a/src/unix/linux/android/gfx/gl-ctx.c +++ b/src/unix/linux/android/gfx/gl-ctx.c @@ -347,3 +347,8 @@ bool mty_gl_ctx_make_current(struct gfx_ctx *gfx_ctx, bool current) return r; } + +bool mty_gl_ctx_hdr_supported(struct gfx_ctx *gfx_ctx) +{ + return false; +} diff --git a/src/unix/linux/generic/gfx/gl-ctx.c b/src/unix/linux/generic/gfx/gl-ctx.c index 267cc2093..66849a181 100644 --- a/src/unix/linux/generic/gfx/gl-ctx.c +++ b/src/unix/linux/generic/gfx/gl-ctx.c @@ -163,3 +163,8 @@ bool mty_gl_ctx_make_current(struct gfx_ctx *gfx_ctx, bool current) return r; } + +bool mty_gl_ctx_hdr_supported(struct gfx_ctx *gfx_ctx) +{ + return false; +} diff --git a/src/unix/web/gfx/gl-ctx.c b/src/unix/web/gfx/gl-ctx.c index 0f5c8327e..0897c012c 100644 --- a/src/unix/web/gfx/gl-ctx.c +++ b/src/unix/web/gfx/gl-ctx.c @@ -101,3 +101,8 @@ bool mty_gl_ctx_make_current(struct gfx_ctx *gfx_ctx, bool current) { return false; } + +bool mty_gl_ctx_hdr_supported(struct gfx_ctx *gfx_ctx) +{ + return false; +} diff --git a/src/windows/gfx/d3d11-ctx.c b/src/windows/gfx/d3d11-ctx.c index b3aea31d7..006ecca6c 100644 --- a/src/windows/gfx/d3d11-ctx.c +++ b/src/windows/gfx/d3d11-ctx.c @@ -9,7 +9,7 @@ GFX_CTX_PROTOTYPES(_d3d11_) #define COBJMACROS #include -#include +#include #define DXGI_FATAL(e) ( \ (e) == DXGI_ERROR_DEVICE_REMOVED || \ @@ -23,17 +23,77 @@ GFX_CTX_PROTOTYPES(_d3d11_) ) #define D3D11_CTX_WAIT 2000 +#ifndef D3D11_CTX_DEBUG + #define D3D11_CTX_DEBUG false +#endif + + +static const MTY_ColorSpace MTY_FORMAT_TO_EXPECTED_MTY_COLORSPACE[] = { + [MTY_COLOR_FORMAT_UNKNOWN] = MTY_COLOR_SPACE_UNKNOWN, + [MTY_COLOR_FORMAT_BGRA] = MTY_COLOR_SPACE_SRGB, + [MTY_COLOR_FORMAT_NV12] = MTY_COLOR_SPACE_SRGB, + [MTY_COLOR_FORMAT_I420] = MTY_COLOR_SPACE_SRGB, + [MTY_COLOR_FORMAT_I444] = MTY_COLOR_SPACE_SRGB, + [MTY_COLOR_FORMAT_NV16] = MTY_COLOR_SPACE_SRGB, + [MTY_COLOR_FORMAT_BGR565] = MTY_COLOR_SPACE_SRGB, + [MTY_COLOR_FORMAT_BGRA5551] = MTY_COLOR_SPACE_SRGB, + [MTY_COLOR_FORMAT_AYUV] = MTY_COLOR_SPACE_SRGB, + [MTY_COLOR_FORMAT_Y410] = MTY_COLOR_SPACE_HDR10, + [MTY_COLOR_FORMAT_P016] = MTY_COLOR_SPACE_HDR10, + [MTY_COLOR_FORMAT_I444_10] = MTY_COLOR_SPACE_HDR10, + [MTY_COLOR_FORMAT_I444_16] = MTY_COLOR_SPACE_HDR10, + [MTY_COLOR_FORMAT_RGB10A2] = MTY_COLOR_SPACE_HDR10, + [MTY_COLOR_FORMAT_RGBA16F] = MTY_COLOR_SPACE_SCRGB_LINEAR, +}; + +static const DXGI_FORMAT MTY_FORMAT_TO_DXGI_FORMAT[] = { + [MTY_COLOR_FORMAT_UNKNOWN] = DXGI_FORMAT_UNKNOWN, + [MTY_COLOR_FORMAT_BGRA] = DXGI_FORMAT_B8G8R8A8_UNORM, + [MTY_COLOR_FORMAT_NV12] = DXGI_FORMAT_UNKNOWN, + [MTY_COLOR_FORMAT_I420] = DXGI_FORMAT_UNKNOWN, + [MTY_COLOR_FORMAT_I444] = DXGI_FORMAT_UNKNOWN, + [MTY_COLOR_FORMAT_NV16] = DXGI_FORMAT_UNKNOWN, + [MTY_COLOR_FORMAT_BGR565] = DXGI_FORMAT_UNKNOWN, + [MTY_COLOR_FORMAT_BGRA5551] = DXGI_FORMAT_UNKNOWN, + [MTY_COLOR_FORMAT_AYUV] = DXGI_FORMAT_UNKNOWN, + [MTY_COLOR_FORMAT_Y410] = DXGI_FORMAT_UNKNOWN, + [MTY_COLOR_FORMAT_P016] = DXGI_FORMAT_UNKNOWN, + [MTY_COLOR_FORMAT_I444_10] = DXGI_FORMAT_UNKNOWN, + [MTY_COLOR_FORMAT_I444_16] = DXGI_FORMAT_UNKNOWN, + [MTY_COLOR_FORMAT_RGB10A2] = DXGI_FORMAT_R10G10B10A2_UNORM, + [MTY_COLOR_FORMAT_RGBA16F] = DXGI_FORMAT_R16G16B16A16_FLOAT, +}; + +static const DXGI_COLOR_SPACE_TYPE MTY_COLORSPACE_TO_DXGI_COLORSPACE[] = { + [MTY_COLOR_SPACE_UNKNOWN] = DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P709, + [MTY_COLOR_SPACE_SRGB] = DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P709, + [MTY_COLOR_SPACE_SCRGB_LINEAR] = DXGI_COLOR_SPACE_RGB_FULL_G10_NONE_P709, + [MTY_COLOR_SPACE_HDR10] = DXGI_COLOR_SPACE_RGB_FULL_G2084_NONE_P2020, +}; struct d3d11_ctx { HWND hwnd; uint32_t width; uint32_t height; MTY_Renderer *renderer; + DXGI_FORMAT format; + DXGI_FORMAT format_new; + DXGI_COLOR_SPACE_TYPE colorspace; + DXGI_COLOR_SPACE_TYPE colorspace_new; ID3D11Device *device; ID3D11DeviceContext *context; ID3D11Texture2D *back_buffer; IDXGISwapChain2 *swap_chain2; + IDXGISwapChain3 *swap_chain3; + IDXGISwapChain4 *swap_chain4; + IDXGIFactory1 *factory1; HANDLE waitable; + bool hdr_init; + bool hdr_supported; + bool hdr; + bool composite_ui; + MTY_HDRDesc hdr_desc; + RECT window_bounds; }; static void d3d11_ctx_get_size(struct d3d11_ctx *ctx, uint32_t *width, uint32_t *height) @@ -45,8 +105,153 @@ static void d3d11_ctx_get_size(struct d3d11_ctx *ctx, uint32_t *width, uint32_t *height = rect.bottom - rect.top; } +static void mty_validate_format_colorspace(struct d3d11_ctx *ctx, MTY_ColorFormat format, MTY_ColorSpace colorspace, DXGI_FORMAT *format_out, DXGI_COLOR_SPACE_TYPE *colorspace_out) +{ + // Default to last known values + DXGI_FORMAT format_new = ctx->format; + DXGI_COLOR_SPACE_TYPE colorspace_new = ctx->colorspace; + + if (format != MTY_COLOR_FORMAT_UNKNOWN && colorspace != MTY_COLOR_SPACE_UNKNOWN) { + // Align the color space to the given format + MTY_ColorSpace colorspace_expected = MTY_FORMAT_TO_EXPECTED_MTY_COLORSPACE[format]; + if (colorspace_expected != colorspace) { + MTY_Log("Expected MTY colorspace 0x%X for MTY format 0x%X but found 0x%X. Forcing colorspace to 0x%X.", colorspace_expected, format, colorspace, colorspace_expected); + colorspace = colorspace_expected; + } + + format_new = MTY_FORMAT_TO_DXGI_FORMAT[format]; + colorspace_new = MTY_COLORSPACE_TO_DXGI_COLORSPACE[colorspace]; + } + + *format_out = format_new; + *colorspace_out = colorspace_new; +} + +static bool d3d11_ctx_refresh_window_bounds(struct d3d11_ctx *ctx) +{ + bool changed = false; + + RECT window_bounds_new = {0}; + GetWindowRect(ctx->hwnd, &window_bounds_new); + + const LONG dt_left = window_bounds_new.left - ctx->window_bounds.left; + const LONG dt_top = window_bounds_new.top - ctx->window_bounds.top; + const LONG dt_right = window_bounds_new.right - ctx->window_bounds.right; + const LONG dt_bottom = window_bounds_new.bottom - ctx->window_bounds.bottom; + + changed = dt_left || dt_top || dt_right || dt_bottom; + + ctx->window_bounds = window_bounds_new; + + return changed; +} + +static bool d3d11_ctx_query_hdr_support(struct d3d11_ctx *ctx) +{ + bool r = false; + + // Courtesy of MSDN https://docs.microsoft.com/en-us/windows/win32/direct3darticles/high-dynamic-range + + // Iterate through the DXGI outputs associated with the DXGI adapter, + // and find the output whose bounds have the greatest overlap with the + // app window (i.e. the output for which the intersection area is the + // greatest). + + // Must create the factory afresh each time, otherwise you'll get a stale value at the end + if (ctx->factory1) { + IDXGIFactory1_Release(ctx->factory1); + ctx->factory1 = NULL; + } + HRESULT e = CreateDXGIFactory1(&IID_IDXGIFactory1, &ctx->factory1); + if (e != S_OK) { + MTY_Log("'CreateDXGIFactory1' failed with HRESULT 0x%X", e); + return r; + } + + // Get the retangle bounds of the app window + const LONG ax1 = ctx->window_bounds.left; + const LONG ay1 = ctx->window_bounds.top; + const LONG ax2 = ctx->window_bounds.right; + const LONG ay2 = ctx->window_bounds.bottom; + + // Go through the outputs of each and every adapter + IDXGIOutput *current_output = NULL; + IDXGIOutput *best_output = NULL; + LONG best_intersect_area = -1; + IDXGIAdapter1 *adapter1 = NULL; + for (UINT j = 0; IDXGIFactory1_EnumAdapters1(ctx->factory1, j, &adapter1) != DXGI_ERROR_NOT_FOUND; j++) { + for (UINT i = 0; IDXGIAdapter1_EnumOutputs(adapter1, i, ¤t_output) != DXGI_ERROR_NOT_FOUND; i++) { + // Get the rectangle bounds of current output + DXGI_OUTPUT_DESC desc = {0}; + e = IDXGIOutput_GetDesc(current_output, &desc); + if (e != S_OK) { + MTY_Log("'IDXGIOutput_GetDesc' failed with HRESULT 0x%X", e); + } else { + const RECT output_bounds = desc.DesktopCoordinates; + const LONG bx1 = output_bounds.left; + const LONG by1 = output_bounds.top; + const LONG bx2 = output_bounds.right; + const LONG by2 = output_bounds.bottom; + + // Compute the intersection and see if its the best fit + // Courtesy of https://github.com/microsoft/DirectX-Graphics-Samples/blob/c79f839da1bb2db77d2306be5e4e664a5d23a36b/Samples/Desktop/D3D12HDR/src/D3D12HDR.cpp#L1046 + const LONG intersect_area = max(0, min(ax2, bx2) - max(ax1, bx1)) * max(0, min(ay2, by2) - max(ay1, by1)); + if (intersect_area > best_intersect_area) { + if (best_output != NULL) + IDXGIOutput_Release(best_output); + + best_output = current_output; + best_intersect_area = intersect_area; + + } else { + IDXGIOutput_Release(current_output); + } + } + } + + IDXGIAdapter1_Release(adapter1); + } + + // Having determined the output (display) upon which the app is primarily being + // rendered, retrieve the HDR capabilities of that display by checking the color space. + IDXGIOutput6 *output6 = NULL; + e = IDXGIOutput_QueryInterface(best_output, &IID_IDXGIOutput6, &output6); + if (e != S_OK) { + MTY_Log("'IDXGIOutput_QueryInterface' failed with HRESULT 0x%X", e); + } else { + DXGI_OUTPUT_DESC1 desc1 = {0}; + e = IDXGIOutput6_GetDesc1(output6, &desc1); + if (e != S_OK) { + MTY_Log("'IDXGIOutput6_GetDesc1' failed with HRESULT 0x%X", e); + + } else { + r = desc1.ColorSpace == DXGI_COLOR_SPACE_RGB_FULL_G2084_NONE_P2020; // this is the canonical check according to MSDN and NVIDIA + } + + IDXGIOutput6_Release(output6); + } + + IDXGIOutput_Release(best_output); + + return r; +} + +static void d3d11_ctx_free_hdr(struct d3d11_ctx *ctx) +{ + if (ctx->swap_chain4) + IDXGISwapChain4_Release(ctx->swap_chain4); + + if (ctx->swap_chain3) + IDXGISwapChain3_Release(ctx->swap_chain3); + + ctx->swap_chain4 = NULL; + ctx->swap_chain3 = NULL; +} + static void d3d11_ctx_free(struct d3d11_ctx *ctx) { + d3d11_ctx_free_hdr(ctx); + if (ctx->back_buffer) ID3D11Texture2D_Release(ctx->back_buffer); @@ -56,6 +261,9 @@ static void d3d11_ctx_free(struct d3d11_ctx *ctx) if (ctx->swap_chain2) IDXGISwapChain2_Release(ctx->swap_chain2); + if (ctx->factory1) + IDXGIFactory1_Release(ctx->factory1); + if (ctx->context) ID3D11DeviceContext_Release(ctx->context); @@ -65,6 +273,7 @@ static void d3d11_ctx_free(struct d3d11_ctx *ctx) ctx->back_buffer = NULL; ctx->waitable = NULL; ctx->swap_chain2 = NULL; + ctx->factory1 = NULL; ctx->context = NULL; ctx->device = NULL; } @@ -77,6 +286,9 @@ static bool d3d11_ctx_init(struct d3d11_ctx *ctx) IDXGIFactory2 *factory2 = NULL; IDXGISwapChain1 *swap_chain1 = NULL; + ctx->format = DXGI_FORMAT_B8G8R8A8_UNORM; + ctx->colorspace = DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P709; + DXGI_SWAP_CHAIN_DESC1 sd = {0}; sd.Format = DXGI_FORMAT_B8G8R8A8_UNORM; sd.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; @@ -84,9 +296,12 @@ static bool d3d11_ctx_init(struct d3d11_ctx *ctx) sd.SampleDesc.Count = 1; sd.BufferCount = 2; sd.Flags = D3D11_SWFLAGS; - + D3D_FEATURE_LEVEL levels[] = {D3D_FEATURE_LEVEL_11_1, D3D_FEATURE_LEVEL_11_0}; - HRESULT e = D3D11CreateDevice(NULL, D3D_DRIVER_TYPE_HARDWARE, NULL, 0, levels, + UINT flags = 0; + if (D3D11_CTX_DEBUG) + flags |= D3D11_CREATE_DEVICE_DEBUG; + HRESULT e = D3D11CreateDevice(NULL, D3D_DRIVER_TYPE_HARDWARE, NULL, flags, levels, sizeof(levels) / sizeof(D3D_FEATURE_LEVEL), D3D11_SDK_VERSION, &ctx->device, NULL, &ctx->context); if (e != S_OK) { MTY_Log("'D3D11CreateDevice' failed with HRESULT 0x%X", e); @@ -117,6 +332,12 @@ static bool d3d11_ctx_init(struct d3d11_ctx *ctx) goto except; } + e = IDXGIFactory2_QueryInterface(factory2, &IID_IDXGIFactory1, &ctx->factory1); + if (e != S_OK) { + MTY_Log("'IDXGIFactory2_QueryInterface' failed with HRESULT 0x%X", e); + goto except; + } + e = IDXGIFactory2_CreateSwapChainForHwnd(factory2, unknown, ctx->hwnd, &sd, NULL, NULL, &swap_chain1); if (e != S_OK) { MTY_Log("'IDXGIFactory2_CreateSwapChainForHwnd' failed with HRESULT 0x%X", e); @@ -153,6 +374,29 @@ static bool d3d11_ctx_init(struct d3d11_ctx *ctx) if (we != WAIT_OBJECT_0) MTY_Log("'WaitForSingleObjectEx' failed with error 0x%X", we); + // HDR init + + HRESULT e_hdr = IDXGISwapChain1_QueryInterface(swap_chain1, &IID_IDXGISwapChain3, &ctx->swap_chain3); + if (e_hdr != S_OK) { + MTY_Log("'IDXGISwapChain1_QueryInterface' failed with HRESULT 0x%X", e_hdr); + goto except_hdr; + } + + e_hdr = IDXGISwapChain1_QueryInterface(swap_chain1, &IID_IDXGISwapChain4, &ctx->swap_chain4); + if (e_hdr != S_OK) { + MTY_Log("'IDXGISwapChain1_QueryInterface' failed with HRESULT 0x%X", e_hdr); + goto except_hdr; + } + + ctx->hdr_init = true; + d3d11_ctx_refresh_window_bounds(ctx); + ctx->hdr_supported = d3d11_ctx_query_hdr_support(ctx); + + except_hdr: + + if (e_hdr != S_OK) + d3d11_ctx_free_hdr(ctx); + except: if (swap_chain1) @@ -226,6 +470,7 @@ static void d3d11_ctx_refresh(struct d3d11_ctx *ctx) d3d11_ctx_get_size(ctx, &width, &height); if (ctx->width != width || ctx->height != height) { + // DXGI_FORMAT_UNKNOWN will resize without changing the existing format HRESULT e = IDXGISwapChain2_ResizeBuffers(ctx->swap_chain2, 0, 0, 0, DXGI_FORMAT_UNKNOWN, D3D11_SWFLAGS); @@ -240,6 +485,56 @@ static void d3d11_ctx_refresh(struct d3d11_ctx *ctx) d3d11_ctx_init(ctx); } } + + bool hdr = ctx->format_new == DXGI_FORMAT_R16G16B16A16_FLOAT || ctx->colorspace_new == DXGI_COLOR_SPACE_RGB_FULL_G2084_NONE_P2020; + + if (ctx->hdr != hdr) { + // If in HDR mode, we keep swap chain in HDR10 (rec2020 10-bit RGB + ST2084 PQ); otherwise in SDR mode, it's the standard BGRA8 sRGB + DXGI_FORMAT format = hdr ? DXGI_FORMAT_R10G10B10A2_UNORM : DXGI_FORMAT_B8G8R8A8_UNORM; + DXGI_COLOR_SPACE_TYPE colorspace = hdr ? DXGI_COLOR_SPACE_RGB_FULL_G2084_NONE_P2020 : DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P709; + + HRESULT e = IDXGISwapChain2_ResizeBuffers(ctx->swap_chain2, 0, 0, 0, format, D3D11_SWFLAGS); + if (e == S_OK) { + e = IDXGISwapChain3_SetColorSpace1(ctx->swap_chain3, colorspace); + + if (e == S_OK) { + ctx->hdr = hdr; + ctx->format = ctx->format_new; + ctx->colorspace = ctx->colorspace_new; + + } else if (DXGI_FATAL(e)) { + MTY_Log("'IDXGISwapChain3_SetColorSpace1' failed with HRESULT 0x%X", e); + d3d11_ctx_free(ctx); + d3d11_ctx_init(ctx); + } + + } else if (DXGI_FATAL(e)) { + MTY_Log("'IDXGISwapChain2_ResizeBuffers' failed with HRESULT 0x%X", e); + d3d11_ctx_free(ctx); + d3d11_ctx_init(ctx); + } + } + + if (ctx->hdr) { + // Update to the latest known HDR metadata + DXGI_HDR_METADATA_HDR10 hdr_desc = {0}; + hdr_desc.RedPrimary[0] = (UINT16) (ctx->hdr_desc.color_primary_red[0] * 50000); // primaries and white point are normalized to 50000 + hdr_desc.RedPrimary[1] = (UINT16) (ctx->hdr_desc.color_primary_red[1] * 50000); + hdr_desc.GreenPrimary[0] = (UINT16) (ctx->hdr_desc.color_primary_green[0] * 50000); + hdr_desc.GreenPrimary[1] = (UINT16) (ctx->hdr_desc.color_primary_green[1] * 50000); + hdr_desc.BluePrimary[0] = (UINT16) (ctx->hdr_desc.color_primary_blue[0] * 50000); + hdr_desc.BluePrimary[1] = (UINT16) (ctx->hdr_desc.color_primary_blue[1] * 50000); + hdr_desc.WhitePoint[0] = (UINT16) (ctx->hdr_desc.white_point[0] * 50000); + hdr_desc.WhitePoint[1] = (UINT16) (ctx->hdr_desc.white_point[1] * 50000); + hdr_desc.MinMasteringLuminance = (UINT) ctx->hdr_desc.min_luminance * 10000; // MinMasteringLuminance is specified as 1/10000th of a nit + hdr_desc.MaxMasteringLuminance = (UINT) ctx->hdr_desc.max_luminance; + hdr_desc.MaxContentLightLevel = (UINT16) ctx->hdr_desc.max_content_light_level; + hdr_desc.MaxFrameAverageLightLevel = (UINT16) ctx->hdr_desc.max_frame_average_light_level; + + HRESULT e = IDXGISwapChain4_SetHDRMetaData(ctx->swap_chain4, DXGI_HDR_METADATA_TYPE_HDR10, sizeof(hdr_desc), &hdr_desc); + if (e != S_OK) + MTY_Log("Unable to set HDR metadata: 'IDXGISwapChain4_SetHDRMetaData' failed with HRESULT 0x%X", e); + } } MTY_Surface *mty_d3d11_ctx_get_surface(struct gfx_ctx *gfx_ctx) @@ -272,6 +567,8 @@ void mty_d3d11_ctx_present(struct gfx_ctx *gfx_ctx, uint32_t interval) ID3D11Texture2D_Release(ctx->back_buffer); ctx->back_buffer = NULL; + ctx->composite_ui = false; + if (DXGI_FATAL(e)) { MTY_Log("'IDXGISwapChain2_Present' failed with HRESULT 0x%X", e); d3d11_ctx_free(ctx); @@ -289,6 +586,15 @@ void mty_d3d11_ctx_draw_quad(struct gfx_ctx *gfx_ctx, const void *image, const M { struct d3d11_ctx *ctx = (struct d3d11_ctx *) gfx_ctx; + DXGI_FORMAT format = DXGI_FORMAT_R8G8B8A8_UNORM; + DXGI_COLOR_SPACE_TYPE colorspace = DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P709; + mty_validate_format_colorspace(ctx, desc->format, desc->colorspace, &format, &colorspace); + ctx->format_new = format; + ctx->colorspace_new = colorspace; + + if (desc->hdrDescSpecified) + ctx->hdr_desc = desc->hdrDesc; + mty_d3d11_ctx_get_surface(gfx_ctx); if (ctx->back_buffer) { @@ -298,6 +604,8 @@ void mty_d3d11_ctx_draw_quad(struct gfx_ctx *gfx_ctx, const void *image, const M MTY_RendererDrawQuad(ctx->renderer, MTY_GFX_D3D11, (MTY_Device *) ctx->device, (MTY_Context *) ctx->context, image, &mutated, (MTY_Surface *) ctx->back_buffer); + + ctx->composite_ui = ctx->hdr; } } @@ -305,11 +613,17 @@ void mty_d3d11_ctx_draw_ui(struct gfx_ctx *gfx_ctx, const MTY_DrawData *dd) { struct d3d11_ctx *ctx = (struct d3d11_ctx *) gfx_ctx; + MTY_DrawData dd_mutated = *dd; + dd_mutated.hdr = ctx->composite_ui; + + ctx->format_new = dd_mutated.hdr ? DXGI_FORMAT_R16G16B16A16_FLOAT : DXGI_FORMAT_B8G8R8A8_UNORM; + ctx->colorspace_new = dd_mutated.hdr ? DXGI_COLOR_SPACE_RGB_FULL_G10_NONE_P709 : DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P709; + mty_d3d11_ctx_get_surface(gfx_ctx); if (ctx->back_buffer) MTY_RendererDrawUI(ctx->renderer, MTY_GFX_D3D11, (MTY_Device *) ctx->device, - (MTY_Context *) ctx->context, dd, (MTY_Surface *) ctx->back_buffer); + (MTY_Context *) ctx->context, &dd_mutated, (MTY_Surface *) ctx->back_buffer); } bool mty_d3d11_ctx_set_ui_texture(struct gfx_ctx *gfx_ctx, uint32_t id, const void *rgba, @@ -332,3 +646,21 @@ bool mty_d3d11_ctx_make_current(struct gfx_ctx *gfx_ctx, bool current) { return false; } + +bool mty_d3d11_ctx_hdr_supported(struct gfx_ctx *gfx_ctx) +{ + struct d3d11_ctx *ctx = (struct d3d11_ctx *) gfx_ctx; + + if (!ctx->hdr_init) { + ctx->hdr_supported = false; + + } else { + const bool adapter_reset = !ctx->factory1 || !IDXGIFactory1_IsCurrent(ctx->factory1); + const bool window_moved = d3d11_ctx_refresh_window_bounds(ctx); // includes when moved to different display + if (window_moved || adapter_reset) { + ctx->hdr_supported = d3d11_ctx_query_hdr_support(ctx); + } + } + + return ctx->hdr_supported; +} diff --git a/src/windows/gfx/d3d11-ui.c b/src/windows/gfx/d3d11-ui.c index 0e9f89bf1..6de127455 100644 --- a/src/windows/gfx/d3d11-ui.c +++ b/src/windows/gfx/d3d11-ui.c @@ -32,6 +32,8 @@ struct d3d11_ui { ID3D11InputLayout *il; ID3D11Buffer *cb; ID3D11Resource *cb_res; + ID3D11Buffer *cbps; + ID3D11Resource *cbps_res; ID3D11PixelShader *ps; ID3D11SamplerState *sampler; ID3D11RasterizerState *rs; @@ -43,6 +45,12 @@ struct d3d11_ui_cb { float proj[4][4]; }; +struct d3d11_ui_cbps { + uint32_t hdr; + float hdr_brighten_factor; + float __pad[2]; // must align to 16 bytes +}; + struct gfx_ui *mty_d3d11_ui_create(MTY_Device *device) { struct d3d11_ui *ctx = MTY_Alloc(1, sizeof(struct d3d11_ui)); @@ -91,6 +99,24 @@ struct gfx_ui *mty_d3d11_ui_create(MTY_Device *device) goto except; } + // Pre create a constant buffer used for storing the pixel shader data + D3D11_BUFFER_DESC desc_cbps = {0}; + desc_cbps.ByteWidth = sizeof(struct d3d11_ui_cbps); + desc_cbps.Usage = D3D11_USAGE_DYNAMIC; + desc_cbps.BindFlags = D3D11_BIND_CONSTANT_BUFFER; + desc_cbps.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + e = ID3D11Device_CreateBuffer(_device, &desc_cbps, NULL, &ctx->cbps); + if (e != S_OK) { + MTY_Log("'ID3D11Device_CreateBuffer' failed with HRESULT 0x%X", e); + goto except; + } + + e = ID3D11Buffer_QueryInterface(ctx->cbps, &IID_ID3D11Resource, &ctx->cbps_res); + if (e != S_OK) { + MTY_Log("'ID3D11Buffer_QueryInterface' failed with HRESULT 0x%X", e); + goto except; + } + // Blend state D3D11_BLEND_DESC bdesc = {0}; bdesc.AlphaToCoverageEnable = false; @@ -277,6 +303,19 @@ bool mty_d3d11_ui_render(struct gfx_ui *gfx_ui, MTY_Device *device, MTY_Context memcpy(&cb->proj, proj, sizeof(proj)); ID3D11DeviceContext_Unmap(_context, ctx->cb_res, 0); + // Update pixel shader constant buffer data + struct d3d11_ui_cbps cbps = {0}; + cbps.hdr = (uint32_t) dd->hdr; + cbps.hdr_brighten_factor = 3.75f; // XXX: this is something that we should allow the user to configure via client settings + + D3D11_MAPPED_SUBRESOURCE cbps_map = {0}; + e = ID3D11DeviceContext_Map(_context, ctx->cbps_res, 0, D3D11_MAP_WRITE_DISCARD, 0, &cbps_map); + if (e != S_OK) + goto except; + + memcpy(cbps_map.pData, &cbps, sizeof(struct d3d11_ui_cbps)); + ID3D11DeviceContext_Unmap(_context, ctx->cbps_res, 0); + // Set render target (wraps the texture) if (_dest) { e = ID3D11Texture2D_QueryInterface(_dest, &IID_ID3D11Resource, &tex_res); @@ -317,6 +356,7 @@ bool mty_d3d11_ui_render(struct gfx_ui *gfx_ui, MTY_Device *device, MTY_Context ID3D11DeviceContext_VSSetShader(_context, ctx->vs, NULL, 0); ID3D11DeviceContext_VSSetConstantBuffers(_context, 0, 1, &ctx->cb); ID3D11DeviceContext_PSSetShader(_context, ctx->ps, NULL, 0); + ID3D11DeviceContext_PSSetConstantBuffers(_context, 0, 1, &ctx->cbps); ID3D11DeviceContext_PSSetSamplers(_context, 0, 1, &ctx->sampler); const float blend_factor[4] = {0.0f, 0.0f, 0.0f, 0.0f}; @@ -480,6 +520,12 @@ void mty_d3d11_ui_destroy(struct gfx_ui **gfx_ui) if (ctx->cb) ID3D11Buffer_Release(ctx->cb); + if (ctx->cbps_res) + ID3D11Resource_Release(ctx->cbps_res); + + if (ctx->cbps) + ID3D11Buffer_Release(ctx->cbps); + if (ctx->il) ID3D11InputLayout_Release(ctx->il); diff --git a/src/windows/gfx/d3d11.c b/src/windows/gfx/d3d11.c index 26bb771e8..fb38732e0 100644 --- a/src/windows/gfx/d3d11.c +++ b/src/windows/gfx/d3d11.c @@ -27,8 +27,10 @@ struct d3d11_psvars { uint32_t filter; uint32_t effect; uint32_t format; + uint32_t colorspace; uint32_t rotation; - uint32_t __pad[1]; // Constant buffers must be in increments of 16 bytes + uint32_t full_range; // 1 = full range video, 0 = limited/partial range video + uint32_t __pad[3]; // Constant buffers must be in increments of 16 bytes }; struct d3d11_res { @@ -272,17 +274,53 @@ static HRESULT d3d11_crop_copy(ID3D11DeviceContext *context, ID3D11Resource *tex return e; } +static void d3d11_get_dxgi_format_and_bpp(const MTY_ColorFormat format, DXGI_FORMAT *out_format, uint8_t *out_bpp) +{ + DXGI_FORMAT result = DXGI_FORMAT_B8G8R8A8_UNORM; + switch (format) { + case MTY_COLOR_FORMAT_BGR565: + result = DXGI_FORMAT_B5G6R5_UNORM; + break; + case MTY_COLOR_FORMAT_BGRA5551: + result = DXGI_FORMAT_B5G5R5A1_UNORM; + break; + case MTY_COLOR_FORMAT_Y410: // according to MSDN, the view format of Y410 is RGB10A2, just like how that of AYUV is BGRA8 + case MTY_COLOR_FORMAT_RGB10A2: + result = DXGI_FORMAT_R10G10B10A2_UNORM; + break; + case MTY_COLOR_FORMAT_RGBA16F: + result = DXGI_FORMAT_R16G16B16A16_FLOAT; + break; + } + *out_format = format; + + uint8_t bpp = 2; + switch (format) { + case MTY_COLOR_FORMAT_BGRA: + case MTY_COLOR_FORMAT_RGB10A2: + case MTY_COLOR_FORMAT_AYUV: + case MTY_COLOR_FORMAT_Y410: + bpp = 4; + case MTY_COLOR_FORMAT_RGBA16F: + bpp = 8; + } + *out_bpp = bpp; +} + static HRESULT d3d11_reload_textures(struct d3d11 *ctx, ID3D11Device *device, ID3D11DeviceContext *context, const void *image, const MTY_RenderDesc *desc) { switch (desc->format) { case MTY_COLOR_FORMAT_BGRA: case MTY_COLOR_FORMAT_AYUV: + case MTY_COLOR_FORMAT_Y410: case MTY_COLOR_FORMAT_BGR565: - case MTY_COLOR_FORMAT_BGRA5551: { - DXGI_FORMAT format = desc->format == MTY_COLOR_FORMAT_BGR565 ? DXGI_FORMAT_B5G6R5_UNORM : - desc->format == MTY_COLOR_FORMAT_BGRA5551 ? DXGI_FORMAT_B5G5R5A1_UNORM : DXGI_FORMAT_B8G8R8A8_UNORM; - uint8_t bpp = (desc->format == MTY_COLOR_FORMAT_BGRA || desc->format == MTY_COLOR_FORMAT_AYUV) ? 4 : 2; + case MTY_COLOR_FORMAT_BGRA5551: + case MTY_COLOR_FORMAT_RGB10A2: + case MTY_COLOR_FORMAT_RGBA16F: { + DXGI_FORMAT format = DXGI_FORMAT_B8G8R8A8_UNORM; + uint8_t bpp = 4; + d3d11_get_dxgi_format_and_bpp(desc->format, &format, &bpp); // BGRA HRESULT e = d3d11_refresh_resource(&ctx->staging[0], device, format, desc->cropWidth, desc->cropHeight); @@ -292,47 +330,60 @@ static HRESULT d3d11_reload_textures(struct d3d11 *ctx, ID3D11Device *device, ID if (e != S_OK) return e; break; } - case MTY_COLOR_FORMAT_NV12: { + case MTY_COLOR_FORMAT_NV12: + case MTY_COLOR_FORMAT_P016: { + // See the following resources to understand memory layout of these formats: + // - https://docs.microsoft.com/en-us/windows/win32/medfound/recommended-8-bit-yuv-formats-for-video-rendering#nv12 + // - https://docs.microsoft.com/en-us/windows/win32/medfound/10-bit-and-16-bit-yuv-video-formats#p016-and-p010 + const bool nv12 = desc->format == MTY_COLOR_FORMAT_NV12; + const uint8_t bpp = nv12 ? 1 : 2; + const DXGI_FORMAT format_y = nv12 ? DXGI_FORMAT_R8_UNORM : DXGI_FORMAT_R16_UNORM; + const DXGI_FORMAT format_uv = nv12 ? DXGI_FORMAT_R8G8_UNORM : DXGI_FORMAT_R16G16_UNORM; + // Y - HRESULT e = d3d11_refresh_resource(&ctx->staging[0], device, DXGI_FORMAT_R8_UNORM, desc->cropWidth, desc->cropHeight); + HRESULT e = d3d11_refresh_resource(&ctx->staging[0], device, format_y, desc->cropWidth, desc->cropHeight); if (e != S_OK) return e; - e = d3d11_crop_copy(context, ctx->staging[0].resource, image, desc->cropWidth, desc->cropHeight, desc->imageWidth, 1); + e = d3d11_crop_copy(context, ctx->staging[0].resource, image, desc->cropWidth, desc->cropHeight, desc->imageWidth, bpp); if (e != S_OK) return e; // UV - e = d3d11_refresh_resource(&ctx->staging[1], device, DXGI_FORMAT_R8G8_UNORM, desc->cropWidth / 2, desc->cropHeight / 2); + e = d3d11_refresh_resource(&ctx->staging[1], device, format_uv, desc->cropWidth / 2, desc->cropHeight / 2); if (e != S_OK) return e; - e = d3d11_crop_copy(context, ctx->staging[1].resource, (uint8_t *) image + desc->imageWidth * desc->imageHeight, desc->cropWidth / 2, desc->cropHeight / 2, desc->imageWidth / 2, 2); + e = d3d11_crop_copy(context, ctx->staging[1].resource, (uint8_t *) image + desc->imageWidth * desc->imageHeight * bpp, desc->cropWidth / 2, desc->cropHeight / 2, desc->imageWidth / 2, bpp * 2); if (e != S_OK) return e; break; } case MTY_COLOR_FORMAT_I420: - case MTY_COLOR_FORMAT_I444: { - uint32_t div = desc->format == MTY_COLOR_FORMAT_I420 ? 2 : 1; + case MTY_COLOR_FORMAT_I444: + case MTY_COLOR_FORMAT_I444_10: + case MTY_COLOR_FORMAT_I444_16: { + const uint32_t div = desc->format == MTY_COLOR_FORMAT_I420 ? 2 : 1; + const DXGI_FORMAT format = desc->format == MTY_COLOR_FORMAT_I444_10 || desc->format == MTY_COLOR_FORMAT_I444_16 ? DXGI_FORMAT_R16_UNORM : DXGI_FORMAT_R8_UNORM; + const uint8_t bpp = format == DXGI_FORMAT_R16_UNORM ? 2 : 1; // Y - HRESULT e = d3d11_refresh_resource(&ctx->staging[0], device, DXGI_FORMAT_R8_UNORM, desc->cropWidth, desc->cropHeight); + HRESULT e = d3d11_refresh_resource(&ctx->staging[0], device, format, desc->cropWidth, desc->cropHeight); if (e != S_OK) return e; - e = d3d11_crop_copy(context, ctx->staging[0].resource, image, desc->cropWidth, desc->cropHeight, desc->imageWidth, 1); + e = d3d11_crop_copy(context, ctx->staging[0].resource, image, desc->cropWidth, desc->cropHeight, desc->imageWidth, bpp); if (e != S_OK) return e; // U - uint8_t *p = (uint8_t *) image + desc->imageWidth * desc->imageHeight; - e = d3d11_refresh_resource(&ctx->staging[1], device, DXGI_FORMAT_R8_UNORM, desc->cropWidth / div, desc->cropHeight / div); + uint8_t *p = (uint8_t *) image + desc->imageWidth * desc->imageHeight * bpp; + e = d3d11_refresh_resource(&ctx->staging[1], device, format, desc->cropWidth / div, desc->cropHeight / div); if (e != S_OK) return e; - e = d3d11_crop_copy(context, ctx->staging[1].resource, p, desc->cropWidth / div, desc->cropHeight / div, desc->imageWidth / div, 1); + e = d3d11_crop_copy(context, ctx->staging[1].resource, p, desc->cropWidth / div, desc->cropHeight / div, desc->imageWidth / div, bpp); if (e != S_OK) return e; // V - p += (desc->imageWidth / div) * (desc->imageHeight / div); - e = d3d11_refresh_resource(&ctx->staging[2], device, DXGI_FORMAT_R8_UNORM, desc->cropWidth / div, desc->cropHeight / div); + p += (desc->imageWidth / div) * (desc->imageHeight / div) * bpp; + e = d3d11_refresh_resource(&ctx->staging[2], device, format, desc->cropWidth / div, desc->cropHeight / div); if (e != S_OK) return e; - e = d3d11_crop_copy(context, ctx->staging[2].resource, p, desc->cropWidth / div, desc->cropHeight / div, desc->imageWidth / div, 1); + e = d3d11_crop_copy(context, ctx->staging[2].resource, p, desc->cropWidth / div, desc->cropHeight / div, desc->imageWidth / div, bpp); if (e != S_OK) return e; break; } @@ -421,7 +472,9 @@ bool mty_d3d11_render(struct gfx *gfx, MTY_Device *device, MTY_Context *context, cb.filter = desc->filter; cb.effect = desc->effect; cb.format = ctx->format; + cb.colorspace = desc->colorspace; cb.rotation = desc->rotation; + cb.full_range = desc->fullRange; D3D11_MAPPED_SUBRESOURCE res = {0}; e = ID3D11DeviceContext_Map(_context, ctx->psbres, 0, D3D11_MAP_WRITE_DISCARD, 0, &res); diff --git a/src/windows/gfx/d3d12-ctx.c b/src/windows/gfx/d3d12-ctx.c index 2807a559c..a23fe46c7 100644 --- a/src/windows/gfx/d3d12-ctx.c +++ b/src/windows/gfx/d3d12-ctx.c @@ -559,3 +559,10 @@ bool mty_d3d12_ctx_make_current(struct gfx_ctx *gfx_ctx, bool current) { return false; } + +bool mty_d3d12_ctx_hdr_supported(struct gfx_ctx *gfx_ctx) +{ + // XXX: Write this when we implement D3D12 support for HDR + + return false; +} diff --git a/src/windows/gfx/d3d12.c b/src/windows/gfx/d3d12.c index ba191aa87..2fdb0b4c8 100644 --- a/src/windows/gfx/d3d12.c +++ b/src/windows/gfx/d3d12.c @@ -31,7 +31,8 @@ struct d3d12_psvars { uint32_t effect; uint32_t format; uint32_t rotation; - uint32_t __pad[1]; // D3D11 needs 16-byte aligned, unsure about D3D12 + uint32_t full_range; // 1 = full range video, 0 = limited/partial range video + uint32_t __pad[3]; // D3D11 needs 16-byte aligned, unsure about D3D12 }; struct d3d12_res { diff --git a/src/windows/gfx/d3d9-ctx.c b/src/windows/gfx/d3d9-ctx.c index a7f97355d..81000fc61 100644 --- a/src/windows/gfx/d3d9-ctx.c +++ b/src/windows/gfx/d3d9-ctx.c @@ -319,3 +319,8 @@ bool mty_d3d9_ctx_make_current(struct gfx_ctx *gfx_ctx, bool current) { return false; } + +bool mty_d3d9_ctx_hdr_supported(struct gfx_ctx *gfx_ctx) +{ + return false; +} diff --git a/src/windows/gfx/gl-ctx.c b/src/windows/gfx/gl-ctx.c index af2a67525..6886d7176 100644 --- a/src/windows/gfx/gl-ctx.c +++ b/src/windows/gfx/gl-ctx.c @@ -199,3 +199,8 @@ bool mty_gl_ctx_make_current(struct gfx_ctx *gfx_ctx, bool current) return r; } + +bool mty_gl_ctx_hdr_supported(struct gfx_ctx *gfx_ctx) +{ + return false; +} diff --git a/src/windows/gfx/shaders/d3d11/hdr10.hlsl b/src/windows/gfx/shaders/d3d11/hdr10.hlsl new file mode 100644 index 000000000..6ba16dffb --- /dev/null +++ b/src/windows/gfx/shaders/d3d11/hdr10.hlsl @@ -0,0 +1,46 @@ +float spow(float x, float p) +{ + return sign(x) * pow(abs(x), p); +} + +float3 spow3(float3 v, float p) +{ + return float3(spow(v.x, p), spow(v.y, p), spow(v.z, p)); +} + +static const float PQ_m_1 = 2610.0f / 4096.0f / 4.0f; +static const float PQ_m_1_d = 1.0f / PQ_m_1; +static const float PQ_m_2 = 2523.0f / 4096.0f * 128.0f; +static const float PQ_m_2_d = 1.0f / PQ_m_2; +static const float PQ_c_1 = 3424.0f / 4096.0f; +static const float PQ_c_2 = 2413.0f / 4096.0f * 32.0f; +static const float PQ_c_3 = 2392.0f / 4096.0f * 32.0f; + +static const float HDR10_MAX_NITS = 10000.0f; + +float3 rec2020_pq_to_rec2020_linear(float3 color, float sdr_max_nits) +{ + // Apply the PQ EOTF (SMPTE ST 2084-2014) in order to linearize it + // Courtesy of https://github.com/colour-science/colour/blob/38782ac059e8ddd91939f3432bf06811c16667f0/colour/models/rgb/transfer_functions/st_2084.py#L126 + + float3 V_p = spow3(color, PQ_m_2_d); + + float3 n = max(0, V_p - PQ_c_1); + + float3 L = spow3(n / (PQ_c_2 - PQ_c_3 * V_p), PQ_m_1_d); + float3 C = L * HDR10_MAX_NITS / sdr_max_nits; + + return C; +} + +float3 rec2020_linear_to_rec2020_pq(float3 color, float sdr_max_nits) +{ + // Apply the inverse of the PQ EOTF (SMPTE ST 2084-2014) in order to encode the signal as PQ + // Courtesy of https://github.com/colour-science/colour/blob/38782ac059e8ddd91939f3432bf06811c16667f0/colour/models/rgb/transfer_functions/st_2084.py#L56 + + float3 Y_p = spow3(max(0.0f, (color / HDR10_MAX_NITS) * sdr_max_nits), PQ_m_1); + + float3 N = spow3((PQ_c_1 + PQ_c_2 * Y_p) / (PQ_c_3 * Y_p + 1.0f), PQ_m_2); + + return N; +} diff --git a/src/windows/gfx/shaders/d3d11/ps.ps4 b/src/windows/gfx/shaders/d3d11/ps.ps4 index e481b8ed8..bd0587854 100644 --- a/src/windows/gfx/shaders/d3d11/ps.ps4 +++ b/src/windows/gfx/shaders/d3d11/ps.ps4 @@ -4,6 +4,8 @@ // If a copy of the MIT License was not distributed with this file, // You can obtain one at https://spdx.org/licenses/MIT.html. +#include "hdr10.hlsl" + struct VS_OUTPUT { float4 position : SV_POSITION; float2 texcoord : TEXCOORD; @@ -16,8 +18,9 @@ cbuffer VS_CONSTANT_BUFFER : register(b0) { uint filter; uint effect; uint format; + uint colorspace; uint rotation; - uint __pad[1]; + bool ycbcr_full_range; }; SamplerState ss { @@ -42,6 +45,39 @@ inline float4 yuv_to_rgba(float y, float u, float v) return float4(r, g, b, 1.0); } +inline float4 yuv_to_rgba_rec2020(float y, float u, float v, bool full_range) +{ + // Using "RGB to YCbCr color conversion for UHDTV" (ITU-R BT.2020) + // Thanks to https://github.com/colour-science/colour/blob/c3735e5d0ad67443022ece0b42b575e040eb61d1/colour/models/rgb/ycbcr.py#L472 + + const float Y_min = full_range ? 0.0f : 0.06256109f; + const float Y_max = full_range ? 1.0f : 0.91886608f; + const float Y_minmax_dt = Y_max - Y_min; + const float C_min = full_range ? 0.0f : 0.06256109f; + const float C_max = full_range ? 1.0f : 0.93841642f; + const float C_minmax_2 = (C_min + C_max) * 0.5f; + const float C_minmax_dt = C_max - C_min; + + y = y - Y_min; + u = u - C_minmax_2; + v = v - C_minmax_2; + + y = y / Y_minmax_dt; + u = u / C_minmax_dt; + v = v / C_minmax_dt; + + const float K_r = 0.2627f; + const float K_b = 0.0593f; + + const float r = y + (2.0f - 2.0f * K_r) * v; + const float b = y + (2.0f - 2.0f * K_b) * u; + const float g = (y - K_r * r - K_b * b) / (1.0f - K_r - K_b); + + return float4(r, g, b, 1.0); +} + +static const float SDR_MAX_NITS = 80.0f; + inline void gaussian(uint type, float w, float h, inout float2 uv) { float2 res = float2(w, h); @@ -95,29 +131,72 @@ float4 main(VS_OUTPUT input) : SV_TARGET if (filter == 3 || filter == 4) gaussian(filter, width, height, uv); - // NV12, NV16 - if (format == 2 || format == 5) { + // NV12, NV16, P016 + if (format == 2 || format == 5 || format == 10) { float y = tex0.Sample(ss, uv).r; float u = tex1.Sample(ss, uv).r; float v = tex1.Sample(ss, uv).g; - rgba = yuv_to_rgba(y, u, v); + if (format == 10 && colorspace == 3) { + rgba = yuv_to_rgba_rec2020(y, u, v, ycbcr_full_range); + + } else { + rgba = yuv_to_rgba(y, u, v); + } - // I420, I444 - } else if (format == 3 || format == 4) { + // I420, I444, I444_10, I444_16 + } else if (format == 3 || format == 4 || format == 11 || format == 12) { float y = tex0.Sample(ss, uv).r; float u = tex1.Sample(ss, uv).r; float v = tex2.Sample(ss, uv).r; - rgba = yuv_to_rgba(y, u, v); + // I444_10 data contain 10-bits per planar pixel but stored in 16-bit UNORM textures. + // DXGI will normalize these 10-bit values by dividing by 2^16 but this is wrong - we need them to be divided by 2^10 instead. + // We correct for this by renormalizing the values here. Note that 2^16 / 2^10 = 64. + if (format == 11) { + y = y * 64.0f; + u = u * 64.0f; + v = v * 64.0f; + } + + if ((format == 11 || format == 12) && colorspace == 3) { + rgba = yuv_to_rgba_rec2020(y, u, v, ycbcr_full_range); - // AYUV - } else if (format == 8) { + } else { + rgba = yuv_to_rgba(y, u, v); + } + + // AYUV, Y410 + } else if (format == 8 || format == 9) { float y = tex0.Sample(ss, uv).r; float u = tex0.Sample(ss, uv).g; float v = tex0.Sample(ss, uv).b; - rgba = yuv_to_rgba(y, u, v); + if (format == 9 && colorspace == 3) { + // Note the reordering of yuv components here...RGBA maps to UYVA according to https://docs.microsoft.com/en-us/windows/win32/api/dxgiformat/ne-dxgiformat-dxgi_format + rgba = yuv_to_rgba_rec2020(u, y, v, ycbcr_full_range); + + } else { + rgba = yuv_to_rgba(y, u, v); + } + + // TODO: This section is a bit obsolete I think....re-evaluate if its needed + // RGB10A2, either sRGB or HDR10 + } else if (format == 13 && (colorspace == 1 || colorspace == 3)) { + if (colorspace == 1) { + // sRGB - do nothing different + rgba = tex0.Sample(ss, uv); + + } else { + // HDR10 + // TODO: Do we need to do a st 2084 encoding/decoding? + // TODO: Do we need to do any tonemapping? + rgba = tex0.Sample(ss, uv); + } + + // RGBA16F, scRGB linear + } else if (format == 14 && colorspace == 2) { + rgba = tex0.Sample(ss, uv); // BGRA } else { diff --git a/src/windows/gfx/shaders/d3d11/psui.ps4 b/src/windows/gfx/shaders/d3d11/psui.ps4 index aa69b1e6a..6ab41a6e4 100644 --- a/src/windows/gfx/shaders/d3d11/psui.ps4 +++ b/src/windows/gfx/shaders/d3d11/psui.ps4 @@ -4,16 +4,55 @@ // If a copy of the MIT License was not distributed with this file, // You can obtain one at https://spdx.org/licenses/MIT.html. +#include "hdr10.hlsl" + struct PS_INPUT { float4 pos : SV_POSITION; float4 col : COLOR0; float2 uv : TEXCOORD0; }; +cbuffer VS_CONSTANT_BUFFER : register(b0) { + uint hdr; + float hdr_brighten_factor; +}; + sampler sampler0; Texture2D texture0; +static const float SDR_MAX_NITS = 80.0f; // the reference sRGB luminance is 80 nits (aka the brightness of paper white) + +// Courtesy of https://github.com/obsproject/obs-studio/pull/6157/files#diff-81ee756f47c3a2fbb9f9fa0a858d79c4da89db97d8ae79fbd643c9533fba177b +static const float3x3 REC709_TO_REC2020 = +{ + {0.6274040f, 0.3292820f, 0.0433136f}, + {0.0690970f, 0.9195400f, 0.0113612f}, + {0.0163916f, 0.0880132f, 0.8955950f} +}; + +float3 srgb_to_linear(float3 color) +{ + // Fast approximation of sRGB's transfer function + return pow(abs(saturate(color)), 2.2f); +} + +float3 srgb_linear_to_rec2020_linear(float3 color) +{ + return mul(REC709_TO_REC2020, color); +} + float4 main(PS_INPUT input) : SV_Target { - return input.col * texture0.Sample(sampler0, input.uv); + float4 ui = input.col * texture0.Sample(sampler0, input.uv); + + if (hdr) { + float3 ui_rgb = ui.rgb; + ui_rgb = srgb_to_linear(ui_rgb); // UI texture is encoded non-linearly in sRGB, so we need to first linearize it + ui_rgb = srgb_linear_to_rec2020_linear(ui_rgb); + ui_rgb *= hdr_brighten_factor; // 1.0 in sRGB is 80 nits which is the reference SDR luminance but most SDR displays will actually render 1.0 at around 200-300 nits for improved viewing; we mimic this by brightening the UI texture by a configurable constant + ui_rgb = rec2020_linear_to_rec2020_pq(ui_rgb, SDR_MAX_NITS); + ui.rgb = ui_rgb; + } + + return ui; }