diff --git a/TelegramSearchBot.OCR/OCRBootstrap.cs b/TelegramSearchBot.OCR/OCRBootstrap.cs new file mode 100644 index 00000000..f3e8d9f6 --- /dev/null +++ b/TelegramSearchBot.OCR/OCRBootstrap.cs @@ -0,0 +1,41 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Net; +using System.Text; +using System.Threading.Tasks; +using StackExchange.Redis; +using TelegramSearchBot.OCR; + +namespace TelegramSearchBot.OCR { + public class OCRBootstrap { + public static void Startup(string[] args) { + ConnectionMultiplexer redis = ConnectionMultiplexer.Connect($"localhost:{args[1]}"); + IDatabase db = redis.GetDatabase(); + var ocr = new PaddleOCR(); + var before = DateTime.UtcNow; + while (DateTime.UtcNow - before < TimeSpan.FromMinutes(10) || + db.ListLength("OCRTasks") > 0) { + if (db.ListLength("OCRTasks") == 0) { + Task.Delay(1000).Wait(); + continue; + } + var task = db.ListLeftPop("OCRTasks").ToString(); + var photoBase64 = db.StringGetDelete($"OCRPost-{task}").ToString(); + var response = ocr.Execute(new List() { photoBase64 }); + int status; + if (int.TryParse(response.Status, out status) && status == 0) { + var StringList = new List(); + foreach (var e in response.Results) { + foreach (var f in e) { + StringList.Add(f.Text); + } + } + db.StringSet($"OCRResult-{task}", string.Join(" ", StringList)); + } else { + db.StringSet($"OCRResult-{task}", ""); + } + } + } + } +} \ No newline at end of file diff --git a/TelegramSearchBot.OCR/PaddleOCR.cs b/TelegramSearchBot.OCR/PaddleOCR.cs new file mode 100644 index 00000000..d22e62cc --- /dev/null +++ b/TelegramSearchBot.OCR/PaddleOCR.cs @@ -0,0 +1,70 @@ +using System; +using System.Collections.Generic; +using System.Drawing; +using System.Drawing.Imaging; +using System.Linq; +using System.Text; +using System.Threading; +using System.Threading.Tasks; +using OpenCvSharp; +using Sdcb.PaddleInference; +using Sdcb.PaddleOCR; +using Sdcb.PaddleOCR.Models; +using Sdcb.PaddleOCR.Models.Local; +using TelegramSearchBot.Common.Model; +using TelegramSearchBot.Common.Model.DO; + +namespace TelegramSearchBot.OCR { + public class PaddleOCR { + public PaddleOcrAll all { get; set; } + private static SemaphoreSlim semaphore = new SemaphoreSlim(1); + public PaddleOCR() { + FullOcrModel model = LocalFullModels.ChineseV3; + + all = new PaddleOcrAll(model, + PaddleDevice.Mkldnn() + ) { + AllowRotateDetection = true, /* 允许识别有角度的文字 */ + Enable180Classification = false, /* 允许识别旋转角度大于90度的文字 */ + }; + } + + public PaddleOcrResult GetOcrResult(byte[] image) { + using (Mat src = Cv2.ImDecode(image, ImreadModes.Color)) { + PaddleOcrResult result = all.Run(src); + return result; + } + } + public List ConvertToResults(PaddleOcrResult paddleOcrResult) { + var results = new List(); + foreach (var region in paddleOcrResult.Regions) { + results.Add(new Result { + Text = region.Text, + TextRegion = region.Rect.Points().Select(point => { + return new List() { ( int ) point.X, ( int ) point.Y }; + }).ToList(), + Confidence = float.IsNaN(region.Score) ? 0 : region.Score, + }); + } + return results; + } + public PaddleOCRResult Execute(List images) { + var results = images + .Select(Convert.FromBase64String) + .Select(GetOcrResult) + .Select(ConvertToResults) + .ToList(); + return new PaddleOCRResult() { + Results = results, + Status = "0", + Message = "", + }; + } + public async Task ExecuteAsync(List images) { + await semaphore.WaitAsync().ConfigureAwait(false); + var results = await Task.Run(() => Execute(images)); + semaphore.Release(); + return results; + } + } +} \ No newline at end of file diff --git a/TelegramSearchBot.OCR/TelegramSearchBot.OCR.csproj b/TelegramSearchBot.OCR/TelegramSearchBot.OCR.csproj new file mode 100644 index 00000000..2f93923c --- /dev/null +++ b/TelegramSearchBot.OCR/TelegramSearchBot.OCR.csproj @@ -0,0 +1,28 @@ + + + + net9.0 + enable + enable + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/TelegramSearchBot.sln b/TelegramSearchBot.sln index 0bfa3823..b560d5a7 100644 --- a/TelegramSearchBot.sln +++ b/TelegramSearchBot.sln @@ -25,6 +25,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "TelegramSearchBot.Search", EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "TelegramSearchBot.Search.Test", "TelegramSearchBot.Search.Test\TelegramSearchBot.Search.Test.csproj", "{A17FCB3D-FF05-46CD-A60E-6E43470A5AB3}" EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "TelegramSearchBot.OCR", "TelegramSearchBot.OCR\TelegramSearchBot.OCR.csproj", "{B8C4D5E6-F7A8-4B9C-8D1E-2F3A4B5C6D7E}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -51,6 +53,10 @@ Global {A17FCB3D-FF05-46CD-A60E-6E43470A5AB3}.Debug|Any CPU.Build.0 = Debug|Any CPU {A17FCB3D-FF05-46CD-A60E-6E43470A5AB3}.Release|Any CPU.ActiveCfg = Release|Any CPU {A17FCB3D-FF05-46CD-A60E-6E43470A5AB3}.Release|Any CPU.Build.0 = Release|Any CPU + {B8C4D5E6-F7A8-4B9C-8D1E-2F3A4B5C6D7E}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {B8C4D5E6-F7A8-4B9C-8D1E-2F3A4B5C6D7E}.Debug|Any CPU.Build.0 = Debug|Any CPU + {B8C4D5E6-F7A8-4B9C-8D1E-2F3A4B5C6D7E}.Release|Any CPU.ActiveCfg = Release|Any CPU + {B8C4D5E6-F7A8-4B9C-8D1E-2F3A4B5C6D7E}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/TelegramSearchBot/Service/Abstract/SubProcessService.cs b/TelegramSearchBot/Service/Abstract/SubProcessService.cs index 52e6fd8d..1ebf6f12 100644 --- a/TelegramSearchBot/Service/Abstract/SubProcessService.cs +++ b/TelegramSearchBot/Service/Abstract/SubProcessService.cs @@ -25,4 +25,4 @@ public async Task RunRpc(string payload) { return await db.StringWaitGetDeleteAsync($"{ForkName}Result-{guid}"); } } -} +} \ No newline at end of file diff --git a/TelegramSearchBot/TelegramSearchBot.csproj b/TelegramSearchBot/TelegramSearchBot.csproj index 24d9df77..6609052f 100644 --- a/TelegramSearchBot/TelegramSearchBot.csproj +++ b/TelegramSearchBot/TelegramSearchBot.csproj @@ -97,6 +97,7 @@ +