diff --git a/workspace/all/minarch/minarch.c b/workspace/all/minarch/minarch.c index 3e72ed89..86073bf9 100644 --- a/workspace/all/minarch/minarch.c +++ b/workspace/all/minarch/minarch.c @@ -121,26 +121,28 @@ static int screen_effect = EFFECT_NONE; // No scanlines or grid effects static int prevent_tearing = 1; // Enable vsync (lenient mode) /** - * Pixel Format Downsampling Flag + * Core Pixel Format * - * Most libretro cores natively output RGB565 (16-bit color), which matches our - * display hardware. However, some cores output XRGB8888 (32-bit color) and require - * real-time conversion to RGB565. + * Tracks the pixel format the current core outputs. Our display hardware uses RGB565 + * (16-bit color), so non-native formats require real-time conversion. * - * When downsample=1: - * - Core outputs XRGB8888 (4 bytes/pixel) - * - buffer_downsample() converts to RGB565 (2 bytes/pixel) - * - Conversion extracts top 5/6/5 bits per channel - * - Adds ~1-2ms overhead per frame (optimized single-pass) + * Supported formats (from libretro.h): + * - RETRO_PIXEL_FORMAT_0RGB1555 (0): Legacy 15-bit, 1 unused bit. Default if core + * doesn't call SET_PIXEL_FORMAT. Used by some older arcade cores (mame2003+). + * Conversion: Extract 5-bit R/G/B, expand G to 6 bits, pack to RGB565. * - * Cores requiring downsampling: - * - PlayStation (PCSX ReARMed) - 32-bit framebuffer - * - Neo Geo (FBNeo/geolith) - High color arcade graphics - * - Some modern cores that prefer higher color depth + * - RETRO_PIXEL_FORMAT_XRGB8888 (1): 32-bit with unused alpha byte. + * Used by PlayStation (PCSX ReARMed), Neo Geo (FBNeo), modern cores. + * Conversion: Extract top 5/6/5 bits per channel, pack to RGB565. * + * - RETRO_PIXEL_FORMAT_RGB565 (2): Native 16-bit format - no conversion needed. + * Most cores use this. Recommended format per libretro spec. + * + * Performance: NEON-optimized conversion adds ~0.3-0.5ms per frame. * Set automatically by RETRO_ENVIRONMENT_SET_PIXEL_FORMAT callback. */ -static int downsample = 0; +static enum retro_pixel_format pixel_format = + RETRO_PIXEL_FORMAT_0RGB1555; // Default per libretro spec // Performance Settings static int show_debug = 0; // Display FPS/CPU usage overlay @@ -2650,16 +2652,21 @@ static bool environment_callback(unsigned cmd, void* data) { // copied from pico case RETRO_ENVIRONMENT_SET_PIXEL_FORMAT: { /* 10 */ const enum retro_pixel_format* format = (enum retro_pixel_format*)data; - LOG_info("Core requested pixel format: %d", *format); - - if (*format == RETRO_PIXEL_FORMAT_RGB565) { - LOG_info("Using native RGB565 format (no conversion needed)"); - downsample = 0; - } else if (*format == RETRO_PIXEL_FORMAT_XRGB8888) { - LOG_info("Using XRGB8888 format with conversion to RGB565"); - downsample = 1; - } else { - LOG_error("Unsupported pixel format %d (only RGB565 and XRGB8888 supported)", *format); + switch (*format) { + case RETRO_PIXEL_FORMAT_0RGB1555: + LOG_info("Core requested 0RGB1555 format (15-bit, conversion to RGB565)"); + pixel_format = RETRO_PIXEL_FORMAT_0RGB1555; + break; + case RETRO_PIXEL_FORMAT_XRGB8888: + LOG_info("Core requested XRGB8888 format (32-bit, conversion to RGB565)"); + pixel_format = RETRO_PIXEL_FORMAT_XRGB8888; + break; + case RETRO_PIXEL_FORMAT_RGB565: + LOG_info("Core requested RGB565 format (native, no conversion needed)"); + pixel_format = RETRO_PIXEL_FORMAT_RGB565; + break; + default: + LOG_error("Core requested unknown pixel format %d", *format); return false; } break; @@ -3345,69 +3352,68 @@ static int fit = 1; // Use software scaler (fit to screen) static int fit = 0; // Use hardware scaler #endif -// Buffer for pixel format conversion (XRGB8888 -> RGB565) -static void* buffer = NULL; +// Helper macro: true if pixel format requires conversion to RGB565 +#define NEEDS_CONVERSION ((pixel_format) != RETRO_PIXEL_FORMAT_RGB565) + +// Buffer for pixel format conversion (0RGB1555/XRGB8888 -> RGB565) +static void* convert_buffer = NULL; /** * Frees pixel format conversion buffer. */ -static void buffer_dealloc(void) { - if (!buffer) +static void convert_buffer_free(void) { + if (!convert_buffer) return; - free(buffer); - buffer = NULL; + free(convert_buffer); + convert_buffer = NULL; } /** - * Allocates pixel format conversion buffer. + * Allocates pixel format conversion buffer for RGB565 output. * * @param w Width in pixels * @param h Height in pixels - * @param p Pitch in bytes (unused but kept for consistency) */ -static void buffer_realloc(int w, int h, int p) { - buffer_dealloc(); +static void convert_buffer_alloc(int w, int h) { + convert_buffer_free(); size_t buffer_size = (w * FIXED_BPP) * h; - buffer = malloc(buffer_size); - if (!buffer) { - LOG_error("Failed to allocate downsample buffer: %dx%d (%zu bytes)", w, h, buffer_size); - LOG_error("Disabling downsampling to prevent crash"); - downsample = 0; + convert_buffer = malloc(buffer_size); + if (!convert_buffer) { + LOG_error("Failed to allocate conversion buffer: %dx%d (%zu bytes)", w, h, buffer_size); + LOG_error("Conversion disabled due to allocation failure"); + // NOTE: Do not change pixel_format here - core will continue outputting + // in the original format, so changing it would cause color corruption return; } - LOG_debug("Allocated downsample buffer: %dx%d (%zu bytes)", w, h, buffer_size); + LOG_debug("Allocated conversion buffer: %dx%d (%zu bytes)", w, h, buffer_size); } +// ============================================================================ +// Pixel Format Conversion Functions +// +// Convert non-native formats to RGB565 for display. NEON-optimized versions +// process multiple pixels at once (8 for 0RGB1555, 4 for XRGB8888) for ~3-4x +// speedup on ARM devices. +// ============================================================================ + +#ifdef HAS_NEON +#include + /** - * Converts XRGB8888 pixel data to RGB565 format using ARM NEON SIMD. - * - * NEON-optimized version that processes 4 pixels in parallel using 128-bit - * vector registers. Achieves ~3-4x speedup vs scalar implementation. + * Converts XRGB8888 to RGB565 using ARM NEON SIMD. * - * Algorithm: - * 1. Load 4 XRGB8888 pixels (128 bits) into NEON quad register - * 2. Extract R, G, B channels using vector AND + shift operations - * 3. Combine into packed RGB565 format - * 4. Narrow from 32-bit to 16-bit and store 4 RGB565 pixels + * Processes 4 pixels per iteration using 128-bit vector operations. + * Input: XXRRGGBB XXRRGGBB XXRRGGBB XXRRGGBB (4x32-bit) + * Output: RRRRRGGGGGGBBBBB RRRRRGGGGGGBBBBB (4x16-bit packed) * - * Performance: Reduces conversion overhead from ~1-2ms to ~0.3-0.5ms per frame. - * - * @param data Source pixel data in XRGB8888 format - * @param width Frame width in pixels - * @param height Frame height in pixels - * @param pitch Bytes per scanline of source data - * - * @note Only available when HAS_NEON is defined - * @note Processes pixels in groups of 4, with scalar fallback for remainder - * @note Uses NEON intrinsics for ARM32/ARM64 portability + * @param data Source XRGB8888 data + * @param width Frame width + * @param height Frame height + * @param pitch Source pitch in bytes */ -#ifdef HAS_NEON -#include - -static void buffer_downsample_neon(const void* data, unsigned width, unsigned height, - size_t pitch) { +static void convert_xrgb8888_neon(const void* data, unsigned width, unsigned height, size_t pitch) { const uint32_t* input = data; - uint16_t* output = buffer; + uint16_t* output = convert_buffer; size_t extra = pitch / sizeof(uint32_t) - width; // NEON mask constants for extracting RGB565 components from XRGB8888 @@ -3415,134 +3421,218 @@ static void buffer_downsample_neon(const void* data, unsigned width, unsigned he const uint32x4_t mask_green = vdupq_n_u32(0x0000FC00); // Green: bits 15-10 const uint32x4_t mask_red = vdupq_n_u32(0x00F80000); // Red: bits 23-19 - // Process scanlines for (unsigned y = 0; y < height; y++) { unsigned x = 0; const uint32_t* line_input = input; uint16_t* line_output = output; - // NEON vectorized loop: process 4 pixels (128 bits) at once - unsigned width_vec = width & ~3; // Round down to multiple of 4 + // NEON: process 4 pixels at a time + unsigned width_vec = width & ~3u; for (; x < width_vec; x += 4) { - // Load 4 XRGB8888 pixels (128 bits total) uint32x4_t pixels = vld1q_u32(line_input); line_input += 4; - // Extract color channels using NEON intrinsics - // Blue: (pixel & 0x000000F8) >> 3 uint32x4_t blue = vshrq_n_u32(vandq_u32(pixels, mask_blue), 3); - - // Green: (pixel & 0x0000FC00) >> 5 uint32x4_t green = vshrq_n_u32(vandq_u32(pixels, mask_green), 5); - - // Red: (pixel & 0x00F80000) >> 8 uint32x4_t red = vshrq_n_u32(vandq_u32(pixels, mask_red), 8); - // Combine channels: RGB565 = R | G | B uint32x4_t rgb565_32 = vorrq_u32(vorrq_u32(red, green), blue); - - // Narrow from 32-bit to 16-bit (4 pixels become 4 uint16_t values) uint16x4_t rgb565 = vmovn_u32(rgb565_32); - // Store 4 RGB565 pixels (64 bits) vst1_u16(line_output, rgb565); line_output += 4; } - // Scalar tail: process remaining pixels (< 4) + // Scalar tail for (; x < width; x++) { uint32_t pixel = *line_input++; *line_output++ = ((pixel & 0xF80000) >> 8) | ((pixel & 0x00FC00) >> 5) | ((pixel & 0x0000F8) >> 3); } - // Move to next scanline (account for pitch padding) input += width + extra; output += width; } } -#endif // HAS_NEON /** - * Converts XRGB8888 pixel data to RGB565 format. + * Converts 0RGB1555 to RGB565 using ARM NEON SIMD. * - * Some cores output 32-bit color (XRGB8888) but the device screen uses - * 16-bit color (RGB565). This function performs the conversion. + * Processes 8 pixels per iteration using 128-bit vector operations. + * Input: 0RRRRRGGGGGBBBBB (5-5-5 with unused MSB) + * Output: RRRRRGGGGGGBBBBB (5-6-5) * - * @param data Source pixel data in XRGB8888 format - * @param width Frame width in pixels - * @param height Frame height in pixels - * @param pitch Bytes per scanline of source data + * The key difference from RGB565 is that green is only 5 bits in 1555, + * so we need to expand it to 6 bits. We duplicate the MSB of green + * into the LSB position: g6 = (g << 1) | (g >> 4) * - * @note Based on picoarch implementation - * @note Writes converted data to 'buffer' global - * @note Uses NEON optimization when HAS_NEON is defined (3-4x speedup) + * @param data Source 0RGB1555 data + * @param width Frame width + * @param height Frame height + * @param pitch Source pitch in bytes */ -static void buffer_downsample(const void* data, unsigned width, unsigned height, size_t pitch) { - // Validate buffer was allocated (buffer_realloc must be called first) - if (!buffer) { - LOG_error("Downsample buffer not allocated - skipping frame"); - return; +static void convert_0rgb1555_neon(const void* data, unsigned width, unsigned height, size_t pitch) { + const uint16_t* input = data; + uint16_t* output = convert_buffer; + size_t extra = pitch / sizeof(uint16_t) - width; + + for (unsigned y = 0; y < height; y++) { + unsigned x = 0; + const uint16_t* line_input = input; + uint16_t* line_output = output; + + // NEON: process 8 pixels at a time + unsigned width_vec = width & ~7u; + for (; x < width_vec; x += 8) { + uint16x8_t src = vld1q_u16(line_input); + line_input += 8; + + // Extract 5-bit components from 0RRRRRGGGGGBBBBB + // R: bits 14-10, G: bits 9-5, B: bits 4-0 + uint16x8_t r = vandq_u16(vshrq_n_u16(src, 10), vdupq_n_u16(0x1F)); + uint16x8_t g = vandq_u16(vshrq_n_u16(src, 5), vdupq_n_u16(0x1F)); + uint16x8_t b = vandq_u16(src, vdupq_n_u16(0x1F)); + + // Expand green from 5 to 6 bits: g6 = (g << 1) | (g >> 4) + // This duplicates the MSB into the new LSB for better color accuracy + uint16x8_t g6 = vorrq_u16(vshlq_n_u16(g, 1), vshrq_n_u16(g, 4)); + + // Pack to RGB565: RRRRRGGGGGGBBBBB + uint16x8_t rgb565 = vorrq_u16(vorrq_u16(vshlq_n_u16(r, 11), vshlq_n_u16(g6, 5)), b); + + vst1q_u16(line_output, rgb565); + line_output += 8; + } + + // Scalar tail + for (; x < width; x++) { + uint16_t px = *line_input++; + uint16_t r = (px >> 10) & 0x1F; + uint16_t g = (px >> 5) & 0x1F; + uint16_t b = px & 0x1F; + uint16_t g6 = (g << 1) | (g >> 4); + *line_output++ = (r << 11) | (g6 << 5) | b; + } + + input += width + extra; + output += width; } +} +#endif // HAS_NEON +/** + * Converts XRGB8888 to RGB565 (scalar fallback). + * + * @param data Source XRGB8888 data + * @param width Frame width + * @param height Frame height + * @param pitch Source pitch in bytes + */ +static void convert_xrgb8888_scalar(const void* data, unsigned width, unsigned height, + size_t pitch) { const uint32_t* input = data; - uint16_t* output = buffer; + uint16_t* output = convert_buffer; + size_t extra = pitch / sizeof(uint32_t) - width; - // Validate pitch is reasonable for XRGB8888 format - size_t min_pitch = width * sizeof(uint32_t); - if (pitch < min_pitch) { - LOG_error("Invalid pitch %zu for width %u (XRGB8888 requires >= %zu)", pitch, width, - min_pitch); - LOG_error("Core framebuffer is corrupt - skipping frame to prevent buffer overrun"); - return; // Abort conversion - reading more data than provided would crash + for (unsigned y = 0; y < height; y++) { + for (unsigned x = 0; x < width; x++) { + uint32_t pixel = *input++; + *output++ = ((pixel & 0xF80000) >> 8) | // Red: bits 23-19 -> 15-11 + ((pixel & 0x00FC00) >> 5) | // Green: bits 15-10 -> 10-5 + ((pixel & 0x0000F8) >> 3); // Blue: bits 7-3 -> 4-0 + } + input += extra; } +} - // Validate pitch is aligned to 4 bytes (sizeof(uint32_t)) - if (pitch % sizeof(uint32_t) != 0) { - LOG_error("Misaligned pitch %zu (not multiple of %zu) - skipping frame", pitch, - sizeof(uint32_t)); - LOG_error("Core framebuffer alignment violation"); - return; // Abort to prevent misaligned memory access +/** + * Converts 0RGB1555 to RGB565 (scalar fallback). + * + * @param data Source 0RGB1555 data + * @param width Frame width + * @param height Frame height + * @param pitch Source pitch in bytes + */ +static void convert_0rgb1555_scalar(const void* data, unsigned width, unsigned height, + size_t pitch) { + const uint16_t* input = data; + uint16_t* output = convert_buffer; + size_t extra = pitch / sizeof(uint16_t) - width; + + for (unsigned y = 0; y < height; y++) { + for (unsigned x = 0; x < width; x++) { + uint16_t px = *input++; + // Extract 5-bit components from 0RRRRRGGGGGBBBBB + uint16_t r = (px >> 10) & 0x1F; + uint16_t g = (px >> 5) & 0x1F; + uint16_t b = px & 0x1F; + // Expand green from 5 to 6 bits + uint16_t g6 = (g << 1) | (g >> 4); + // Pack to RGB565 + *output++ = (r << 11) | (g6 << 5) | b; + } + input += extra; } +} - // Calculate stride: number of pixels to skip after each scanline - // For XRGB8888 (4 bytes/pixel), extra padding per line is: - // extra = (pitch / 4) - width - // Example: For a 160-pixel-wide image with 640-byte pitch (160 × 4): - // extra = (640 / 4) - 160 = 0 pixels of padding per line - size_t extra = pitch / sizeof(uint32_t) - width; +/** + * Converts pixel data to RGB565 format based on current pixel_format setting. + * + * Dispatches to the appropriate conversion function (NEON-optimized or scalar) + * based on the source format. RGB565 input is a no-op (returns immediately). + * + * @param data Source pixel data + * @param width Frame width in pixels + * @param height Frame height in pixels + * @param pitch Source pitch in bytes + * + * @note Writes converted data to convert_buffer global + * @note convert_buffer_alloc() must be called first + */ +static void pixel_convert(const void* data, unsigned width, unsigned height, size_t pitch) { + if (!convert_buffer) { + LOG_error("Conversion buffer not allocated - skipping frame"); + return; + } - LOG_debug("Downsampling %ux%u XRGB8888->RGB565: pitch=%zu bytes, stride=%zu pixels", width, - height, pitch, extra); + // Validate pitch based on pixel format + size_t bytes_per_pixel = (pixel_format == RETRO_PIXEL_FORMAT_XRGB8888) ? 4 : 2; + size_t min_pitch = width * bytes_per_pixel; - // Warn about unusually large pitch values (may indicate core issues) - if (extra > width * 2) { - LOG_warn( - "Very large pitch stride: %zu pixels padding for %u visible (core may have issues)", - extra, width); + if (pitch < min_pitch) { + LOG_error("Invalid pitch %zu for width %u (format %d requires >= %zu)", pitch, width, + pixel_format, min_pitch); + return; } + LOG_debug("Converting %ux%u from format %d to RGB565", width, height, pixel_format); + + switch (pixel_format) { + case RETRO_PIXEL_FORMAT_XRGB8888: #ifdef HAS_NEON - // Use NEON-optimized version when available (3-4x faster) - // NEON processes 4 pixels at a time using SIMD instructions - buffer_downsample_neon(data, width, height, pitch); + convert_xrgb8888_neon(data, width, height, pitch); #else - // Scalar fallback: Convert XRGB8888 to RGB565 pixel-by-pixel - for (unsigned y = 0; y < height; y++) { - for (unsigned x = 0; x < width; x++) { - // Optimized single-operation conversion: - // Extract R (bits 23-19), G (bits 15-10), B (bits 7-3) and pack into RGB565 - uint32_t pixel = *input; - *output = ((pixel & 0xF80000) >> 8) | // Red: 5 bits - ((pixel & 0x00FC00) >> 5) | // Green: 6 bits - ((pixel & 0x0000F8) >> 3); // Blue: 5 bits - input++; - output++; - } + convert_xrgb8888_scalar(data, width, height, pitch); +#endif + break; - input += extra; // Skip padding to next scanline - } + case RETRO_PIXEL_FORMAT_0RGB1555: +#ifdef HAS_NEON + convert_0rgb1555_neon(data, width, height, pitch); +#else + convert_0rgb1555_scalar(data, width, height, pitch); #endif + break; + + case RETRO_PIXEL_FORMAT_RGB565: + // Should never be called for RGB565, but handle it gracefully + LOG_warn("pixel_convert called for RGB565 (no conversion needed)"); + break; + + default: + LOG_error("Unknown pixel format %d", pixel_format); + break; + } } /** @@ -3659,8 +3749,8 @@ static void* apply_rotation(void* src, uint32_t src_w, uint32_t src_h, uint32_t * @note Clears screen when scaler changes */ static void selectScaler(int src_w, int src_h, int src_p) { - if (downsample) - buffer_realloc(src_w, src_h, src_p); + if (NEEDS_CONVERSION) + convert_buffer_alloc(src_w, src_h); // ROTATION: Swap dimensions for 90°/270° rotations BEFORE scaling calculations // Note: core.aspect_ratio is already for the ROTATED dimensions, so don't invert it @@ -3939,15 +4029,15 @@ static void video_refresh_callback_main(const void* data, unsigned width, unsign fps_ticks += 1; // Calculate pitches for different stages - // pitch = bytes per line as provided by core (XRGB8888 or RGB565) + // pitch = bytes per line as provided by core (varies by pixel format) // rgb565_pitch = bytes per line in RGB565 format (what renderer expects) size_t rgb565_pitch; - if (downsample) { - // Core provided XRGB8888 (4 bytes/pixel), we'll convert to RGB565 (2 bytes/pixel) + if (NEEDS_CONVERSION) { + // Core uses non-native format, we'll convert to RGB565 (2 bytes/pixel) rgb565_pitch = width * FIXED_BPP; - LOG_debug("XRGB8888->RGB565: %ux%u, pitch %zu->%zu bytes", width, height, pitch, - rgb565_pitch); + LOG_debug("Format %d->RGB565: %ux%u, pitch %zu->%zu bytes", pixel_format, width, height, + pitch, rgb565_pitch); } else { // Core provided RGB565 directly, use as-is rgb565_pitch = pitch; @@ -3979,16 +4069,9 @@ static void video_refresh_callback_main(const void* data, unsigned width, unsign void* frame_data; size_t frame_pitch; - if (downsample) { - // Validate pitch before attempting conversion - size_t min_pitch = width * sizeof(uint32_t); - if (pitch < min_pitch) { - LOG_error("Skipping frame due to invalid pitch: %zu < %zu", pitch, min_pitch); - return; // Abort entire frame to prevent rendering corrupted data - } - - buffer_downsample(data, width, height, pitch); - frame_data = buffer; + if (NEEDS_CONVERSION) { + pixel_convert(data, width, height, pitch); + frame_data = convert_buffer; frame_pitch = rgb565_pitch; } else { frame_data = (void*)data; @@ -4006,7 +4089,7 @@ static void video_refresh_callback_main(const void* data, unsigned width, unsign renderer.src = rotated_data; - // debug - render after downsample so we write to RGB565 buffer + // debug - render after pixel conversion so we write to RGB565 buffer if (show_debug) { int x = 2 + renderer.src_x; int y = 2 + renderer.src_y; @@ -4075,14 +4158,14 @@ static void video_refresh_callback_main(const void* data, unsigned width, unsign * - Non-threaded: Calls video_refresh_callback_main directly * - Threaded: Copies/converts frame to backbuffer and signals main thread * - * @param data Pointer to pixel data (XRGB8888 or RGB565 depending on downsample) + * @param data Pointer to pixel data (format depends on pixel_format setting) * @param width Frame width in pixels * @param height Frame height in pixels * @param pitch Bytes per scanline * * @note This is a libretro callback, invoked by core after rendering a frame * @note Threading mode copies frame to prevent race conditions - * @note When downsampling, performs XRGB8888->RGB565 conversion here + * @note When using non-RGB565 format, performs pixel conversion here */ static void video_refresh_callback(const void* data, unsigned width, unsigned height, size_t pitch) { @@ -4093,9 +4176,9 @@ static void video_refresh_callback(const void* data, unsigned width, unsigned he pthread_mutex_lock(&core_mx); // Determine backbuffer pitch: - // - Downsampling: Output is tightly packed (width * 2 bytes/line) + // - Non-RGB565: Output is tightly packed after conversion (width * 2 bytes/line) // - RGB565: Preserve core's pitch (may have padding) - size_t backbuffer_pitch = downsample ? (width * FIXED_BPP) : pitch; + size_t backbuffer_pitch = NEEDS_CONVERSION ? (width * FIXED_BPP) : pitch; // Reallocate backbuffer if dimensions changed if (backbuffer && (backbuffer->w != (int)width || backbuffer->h != (int)height || @@ -4121,24 +4204,19 @@ static void video_refresh_callback(const void* data, unsigned width, unsigned he } // Copy or convert data to backbuffer - if (downsample) { - // Validate pitch before attempting conversion - size_t min_pitch = width * sizeof(uint32_t); - if (pitch < min_pitch) { - LOG_error("Skipping threaded frame due to invalid pitch: %zu < %zu", pitch, - min_pitch); - pthread_mutex_unlock(&core_mx); - return; // Abort frame to prevent buffer corruption - } - - // Core provided XRGB8888, convert to tightly-packed RGB565 - buffer_downsample(data, width, height, pitch); - if (!buffer) { - LOG_error("Failed to allocate downsample buffer: %ux%u", width, height); + if (NEEDS_CONVERSION) { + // Ensure conversion buffer is allocated + if (!convert_buffer) + convert_buffer_alloc(width, height); + + // Convert to RGB565 + pixel_convert(data, width, height, pitch); + if (!convert_buffer) { + LOG_error("Failed to allocate conversion buffer: %ux%u", width, height); pthread_mutex_unlock(&core_mx); return; } - memcpy(backbuffer->pixels, buffer, height * backbuffer_pitch); + memcpy(backbuffer->pixels, convert_buffer, height * backbuffer_pitch); } else { // Core provided RGB565, direct copy with original pitch memcpy(backbuffer->pixels, data, height * backbuffer_pitch); @@ -4367,9 +4445,15 @@ void Core_quit(void) { } } void Core_close(void) { + // Free pixel format conversion buffer + convert_buffer_free(); + // Free rotation buffer rotation_buffer_free(); + // Reset pixel format to default for next core + pixel_format = RETRO_PIXEL_FORMAT_0RGB1555; + if (core.handle) dlclose(core.handle); } @@ -6500,7 +6584,7 @@ int main(int argc, char* argv[]) { PAD_quit(); GFX_quit(); - buffer_dealloc(); + convert_buffer_free(); return EXIT_SUCCESS; }