From 61f932f453f91c2e6c03c94e90c76596701990b6 Mon Sep 17 00:00:00 2001 From: Gabe Stocco <98900+gfs@users.noreply.github.com> Date: Tue, 20 Dec 2022 12:53:04 -0800 Subject: [PATCH] Remove parallelized Rar Extractor The underlying dependency's "OpenEntryStream" api is not threadsafe as it implicitly calls the ctor for RarStream which directly maniuplates the shared reference to the overall Stream for the archive file, resulting in corrupted files (albeit of the correct quantity) when using the parallel argument. --- RecursiveExtractor/Extractors/RarExtractor.cs | 91 ++++--------------- 1 file changed, 19 insertions(+), 72 deletions(-) diff --git a/RecursiveExtractor/Extractors/RarExtractor.cs b/RecursiveExtractor/Extractors/RarExtractor.cs index 15472456..7249ab6b 100644 --- a/RecursiveExtractor/Extractors/RarExtractor.cs +++ b/RecursiveExtractor/Extractors/RarExtractor.cs @@ -143,86 +143,33 @@ public IEnumerable Extract(FileEntry fileEntry, ExtractorOptions opti if (rarArchive != null && fileEntry.EntryStatus == FileEntryStatus.Default) { var entries = rarArchive.Entries.Where(x => x.IsComplete && !x.IsDirectory); - if (options.Parallel) + foreach (var entry in entries) { - var files = new ConcurrentStack(); - - while (entries.Any()) + governor.CheckResourceGovernor(entry.Size); + FileEntry? newFileEntry = null; + try { - var batchSize = Math.Min(options.BatchSize, entries.Count()); - - var streams = entries.Take(batchSize).Select(entry => (entry, entry.OpenEntryStream())).ToList(); - - governor.CheckResourceGovernor(streams.Sum(x => x.Item2.Length)); - - streams.AsParallel().ForAll(streampair => - { - try - { - FileEntry newFileEntry = new FileEntry(streampair.entry.Key, streampair.Item2, fileEntry, false, streampair.entry.CreatedTime, streampair.entry.LastModifiedTime, streampair.entry.LastAccessedTime, memoryStreamCutoff: options.MemoryStreamCutoff); - - if (newFileEntry != null) - { - if (options.Recurse || topLevel) - { - var entries = Context.Extract(newFileEntry, options, governor, false); - if (entries.Any()) - { - files.PushRange(entries.ToArray()); - } - } - else - { - files.Push(newFileEntry); - } - } - } - catch (Exception e) - { - Logger.Debug(Extractor.DEBUG_STRING, ArchiveFileType.RAR, fileEntry.FullPath, streampair.entry.Key, e.GetType()); - } - }); - governor.CheckResourceGovernor(0); - - entries = entries.Skip(streams.Count); - - while (files.TryPop(out var result)) - { - if (result != null) - yield return result; - } + var stream = entry.OpenEntryStream(); + var name = entry.Key.Replace('/', Path.DirectorySeparatorChar); + newFileEntry = new FileEntry(name, stream, fileEntry, false, entry.CreatedTime, entry.LastModifiedTime, entry.LastAccessedTime, memoryStreamCutoff: options.MemoryStreamCutoff); } - } - else - { - foreach (var entry in entries) + catch (Exception e) { - governor.CheckResourceGovernor(entry.Size); - FileEntry? newFileEntry = null; - try - { - var stream = entry.OpenEntryStream(); - var name = entry.Key.Replace('/', Path.DirectorySeparatorChar); - newFileEntry = new FileEntry(name, stream, fileEntry, false, entry.CreatedTime, entry.LastModifiedTime, entry.LastAccessedTime, memoryStreamCutoff: options.MemoryStreamCutoff); - } - catch (Exception e) - { - Logger.Debug(Extractor.DEBUG_STRING, ArchiveFileType.RAR, fileEntry.FullPath, entry.Key, e.GetType()); - } - if (newFileEntry != null) + Logger.Debug(Extractor.DEBUG_STRING, ArchiveFileType.RAR, fileEntry.FullPath, entry.Key, e.GetType()); + } + if (newFileEntry != null) + { + if (options.Recurse || topLevel) { - if (options.Recurse || topLevel) - { - foreach (var innerEntry in Context.Extract(newFileEntry, options, governor, false)) - { - yield return innerEntry; - } - } - else + foreach (var innerEntry in Context.Extract(newFileEntry, options, governor, false)) { - yield return newFileEntry; + yield return innerEntry; } } + else + { + yield return newFileEntry; + } } } }