PaddleOCREngine.cs 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300
  1. // Copyright (c) 2021 raoyutian Authors. All Rights Reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. using System.Collections.Generic;
  15. using System.Drawing;
  16. using System.Runtime.InteropServices;
  17. using System;
  18. using System.Linq;
  19. using System.Drawing.Imaging;
  20. using System.IO;
  21. using System.Reflection;
  22. namespace PaddleOCRSharp
  23. {
  24. /// <summary>
  25. /// PaddleOCR识别引擎对象
  26. /// </summary>
  27. public class PaddleOCREngine : EngineBase
  28. {
  29. #region PaddleOCR API
  30. [DllImport(PaddleOCRdllName, CallingConvention = CallingConvention.StdCall, SetLastError = true)]
  31. internal static extern void Initialize(string det_infer, string cls_infer, string rec_infer, string keys, OCRParameter parameter);
  32. [DllImport(PaddleOCRdllName, CallingConvention = CallingConvention.StdCall, SetLastError = true)]
  33. internal static extern void Initializejson(string det_infer, string cls_infer, string rec_infer, string keys, string parameterjson);
  34. [DllImport(PaddleOCRdllName, CallingConvention = CallingConvention.StdCall, SetLastError = true)]
  35. internal static extern IntPtr Detect(string imagefile);
  36. [DllImport(PaddleOCRdllName, CallingConvention = CallingConvention.StdCall, SetLastError = true)]
  37. internal static extern IntPtr DetectByte(byte[] imagebytedata, long size);
  38. [DllImport(PaddleOCRdllName, CallingConvention = CallingConvention.StdCall, SetLastError = true)]
  39. internal static extern IntPtr DetectBase64(string imagebase64);
  40. [DllImport(PaddleOCRdllName, CallingConvention = CallingConvention.StdCall, SetLastError = true)]
  41. internal static extern int FreeEngine();
  42. #endregion
  43. #region 文本识别
  44. /// <summary>
  45. /// PaddleOCR识别引擎对象初始化
  46. /// </summary>
  47. /// <param name="config">模型配置对象,如果为空则按默认值</param>
  48. /// <param name="parameter">识别参数,为空均按缺省值</param>
  49. public PaddleOCREngine(OCRModelConfig config, OCRParameter parameter = null) : base()
  50. {
  51. #if NET35
  52. #else
  53. if (!Environment.Is64BitProcess) throw new NotSupportedException($"PaddleOCRSharp只支持64位进程。");
  54. #endif
  55. //0:不支持,1:AVX,2:AVX2
  56. if (IsCPUSupport() <= 0) throw new NotSupportedException($"当前CPU指令集不支持PaddleOCR。The CPU instruction set is not surpport PaddleOCR");
  57. if (parameter == null) parameter = new OCRParameter();
  58. if (config == null)
  59. {
  60. string root= GetRootDirectory();
  61. config = new OCRModelConfig();
  62. string modelPathroot = root + @"\inference";
  63. config.det_infer = modelPathroot + @"\ch_PP-OCRv3_det_infer";
  64. config.cls_infer = modelPathroot + @"\ch_ppocr_mobile_v2.0_cls_infer";
  65. config.rec_infer = modelPathroot + @"\ch_PP-OCRv3_rec_infer";
  66. config.keys = modelPathroot + @"\ppocr_keys.txt";
  67. }
  68. if (!Directory.Exists(config.det_infer)) throw new DirectoryNotFoundException(config.det_infer);
  69. if (!Directory.Exists(config.cls_infer)) throw new DirectoryNotFoundException(config.cls_infer);
  70. if (!Directory.Exists(config.rec_infer)) throw new DirectoryNotFoundException(config.rec_infer);
  71. if (!File.Exists(config.keys)) throw new FileNotFoundException(config.keys);
  72. Initialize(config.det_infer, config.cls_infer, config.rec_infer, config.keys, parameter);
  73. }
  74. /// <summary>
  75. /// PaddleOCR识别引擎对象初始化
  76. /// </summary>
  77. /// <param name="config">模型配置对象,如果为空则按默认值</param>
  78. /// <param name="parameterjson">识别参数json字符串</param>
  79. public PaddleOCREngine(OCRModelConfig config, string parameterjson):base()
  80. {
  81. #if NET35
  82. #else
  83. if (!Environment.Is64BitProcess) throw new NotSupportedException($"PaddleOCRSharp只支持64位进程。");
  84. #endif
  85. //0:不支持,1:AVX,2:AVX2
  86. if (IsCPUSupport() <= 0) throw new NotSupportedException($"当前CPU指令集不支持PaddleOCR。The CPU instruction set is not surpport PaddleOCR");
  87. if (config == null)
  88. {
  89. string root = GetRootDirectory();
  90. config = new OCRModelConfig();
  91. string modelPathroot = root + @"\inference";
  92. config.det_infer = modelPathroot + @"\ch_PP-OCRv3_det_infer";
  93. config.cls_infer = modelPathroot + @"\ch_ppocr_mobile_v2.0_cls_infer";
  94. config.rec_infer = modelPathroot + @"\ch_PP-OCRv3_rec_infer";
  95. config.keys = modelPathroot + @"\ppocr_keys.txt";
  96. }
  97. if ( string.IsNullOrEmpty(parameterjson))
  98. {
  99. parameterjson = GetRootDirectory();
  100. parameterjson += @"\inference\PaddleOCR.config.json";
  101. if(!File.Exists(parameterjson)) throw new FileNotFoundException(parameterjson);
  102. parameterjson = File.ReadAllText(parameterjson);
  103. }
  104. if (!Directory.Exists(config.det_infer)) throw new DirectoryNotFoundException(config.det_infer);
  105. if (!Directory.Exists(config.cls_infer)) throw new DirectoryNotFoundException(config.cls_infer);
  106. if (!Directory.Exists(config.rec_infer)) throw new DirectoryNotFoundException(config.rec_infer);
  107. if (!File.Exists(config.keys)) throw new FileNotFoundException(config.keys);
  108. Initializejson(config.det_infer, config.cls_infer, config.rec_infer, config.keys, parameterjson);
  109. }
  110. /// <summary>
  111. /// 对图像文件进行文本识别
  112. /// </summary>
  113. /// <param name="imagefile">图像文件</param>
  114. /// <returns>OCR识别结果</returns>
  115. public OCRResult DetectText(string imagefile)
  116. {
  117. if (!File.Exists(imagefile)) throw new Exception($"文件{imagefile}不存在");
  118. var imagebyte = File.ReadAllBytes(imagefile);
  119. var result= DetectText(imagebyte);
  120. imagebyte = null;
  121. return result;
  122. }
  123. /// <summary>
  124. ///对图像对象进行文本识别
  125. /// </summary>
  126. /// <param name="image">图像</param>
  127. /// <returns>OCR识别结果</returns>
  128. public OCRResult DetectText(Image image)
  129. {
  130. if (image == null) throw new ArgumentNullException("image");
  131. var imagebyte = ImageToBytes(image);
  132. var result = DetectText(imagebyte);
  133. imagebyte = null;
  134. return result;
  135. }
  136. /// <summary>
  137. ///文本识别
  138. /// </summary>
  139. /// <param name="imagebyte">图像内存流</param>
  140. /// <returns>OCR识别结果</returns>
  141. public OCRResult DetectText(byte[] imagebyte)
  142. {
  143. if (imagebyte == null) throw new ArgumentNullException("imagebyte");
  144. var ptrResult = DetectByte(imagebyte, imagebyte.LongLength);
  145. return ConvertResult(ptrResult);
  146. }
  147. /// <summary>
  148. ///文本识别
  149. /// </summary>
  150. /// <param name="imagebase64">图像base64</param>
  151. /// <returns>OCR识别结果</returns>
  152. public OCRResult DetectTextBase64(string imagebase64)
  153. {
  154. if (imagebase64 == null || imagebase64 == "") throw new ArgumentNullException("imagebase64");
  155. IntPtr ptrResult = DetectBase64(imagebase64);
  156. return ConvertResult(ptrResult);
  157. }
  158. /// <summary>
  159. /// 结果解析
  160. /// </summary>
  161. /// <param name="ptrResult"></param>
  162. /// <returns></returns>
  163. private OCRResult ConvertResult(IntPtr ptrResult)
  164. {
  165. OCRResult result = new OCRResult();
  166. try
  167. {
  168. string json = Marshal.PtrToStringUni(ptrResult);
  169. List<TextBlock> textBlocks = JsonHelper.DeserializeObject<List<TextBlock>>(json);
  170. result.JsonText = json;
  171. result.TextBlocks = textBlocks;
  172. Marshal.FreeHGlobal(ptrResult);
  173. }
  174. catch (Exception ex)
  175. {
  176. throw new Exception("OCR结果Json反序列化失败。", ex);
  177. }
  178. return result;
  179. }
  180. #endregion
  181. #region 表格识别
  182. /// <summary>
  183. ///结构化文本识别
  184. /// </summary>
  185. /// <param name="image">图像</param>
  186. /// <returns>表格识别结果</returns>
  187. public OCRStructureResult DetectStructure(Image image)
  188. {
  189. if (image == null) throw new ArgumentNullException("image");
  190. var imagebyte = ImageToBytes(image);
  191. OCRResult result= DetectText(imagebyte);
  192. List<TextBlock> blocks = result.TextBlocks;
  193. if (blocks == null || blocks.Count == 0) return new OCRStructureResult();
  194. var listys = getzeroindexs(blocks.OrderBy(x => x.BoxPoints[0].Y).Select(x => x.BoxPoints[0].Y).ToArray(), 10);
  195. var listxs = getzeroindexs(blocks.OrderBy(x => x.BoxPoints[0].X).Select(x => x.BoxPoints[0].X).ToArray(), 10);
  196. int rowcount = listys.Count;
  197. int colcount = listxs.Count;
  198. OCRStructureResult structureResult = new OCRStructureResult();
  199. structureResult.TextBlocks = blocks;
  200. structureResult.RowCount = rowcount;
  201. structureResult.ColCount = colcount;
  202. structureResult.Cells = new List<StructureCells>();
  203. for (int i = 0; i < rowcount; i++)
  204. {
  205. int y_min = blocks.OrderBy(x => x.BoxPoints[0].Y).OrderBy(x => x.BoxPoints[0].Y).ToList()[listys[i]].BoxPoints[0].Y;
  206. int y_max = 99999;
  207. if (i < rowcount - 1)
  208. {
  209. y_max = blocks.OrderBy(x => x.BoxPoints[0].Y).ToList()[listys[i + 1]].BoxPoints[0].Y;
  210. }
  211. for (int j = 0; j < colcount; j++)
  212. {
  213. int x_min = blocks.OrderBy(x => x.BoxPoints[0].X).ToList()[listxs[j]].BoxPoints[0].X;
  214. int x_max = 99999;
  215. if (j < colcount - 1)
  216. {
  217. x_max = blocks.OrderBy(x => x.BoxPoints[0].X).ToList()[listxs[j + 1]].BoxPoints[0].X;
  218. }
  219. var textBlocks = blocks.Where(x => x.BoxPoints[0].X < x_max && x.BoxPoints[0].X >= x_min && x.BoxPoints[0].Y < y_max && x.BoxPoints[0].Y >= y_min).OrderBy(u => u.BoxPoints[0].X);
  220. var texts = textBlocks.Select(x => x.Text).ToArray();
  221. StructureCells cell = new StructureCells();
  222. cell.Row = i;
  223. cell.Col = j;
  224. #if NET35
  225. cell.Text = string.Join("", texts);
  226. #else
  227. cell.Text = string.Join<string>("", texts);
  228. #endif
  229. cell.TextBlocks = textBlocks.ToList();
  230. structureResult.Cells.Add(cell);
  231. }
  232. }
  233. return structureResult;
  234. }
  235. /// <summary>
  236. /// 计算表格分割
  237. /// </summary>
  238. /// <param name="pixellist"></param>
  239. /// <param name="thresholdtozero"></param>
  240. /// <returns></returns>
  241. private List<int> getzeroindexs(int[] pixellist, int thresholdtozero = 10)
  242. {
  243. List<int> zerolist = new List<int>();
  244. zerolist.Add(0);
  245. for (int i = 0; i < pixellist.Length; i++)
  246. {
  247. if ((i < pixellist.Length - 1)
  248. && (Math.Abs(pixellist[i + 1] - pixellist[i])) > thresholdtozero)
  249. {
  250. //突增点
  251. zerolist.Add(i + 1);
  252. }
  253. }
  254. return zerolist;
  255. }
  256. #endregion
  257. #region Dispose
  258. /// <summary>
  259. /// 释放对象
  260. /// </summary>
  261. public override void Dispose()
  262. {
  263. FreeEngine();
  264. }
  265. #endregion
  266. }
  267. }