用Ghostscript API将PDF格式转换为图像格式(C#)

简介: 原文: 用Ghostscript API将PDF格式转换为图像格式(C#) 由于项目需要在.net下将pdf转换为普通图像格式,在网上搜了好久终于找到一个解决方案,于是采用拿来主义直接用。
原文: 用Ghostscript API将PDF格式转换为图像格式(C#)

由于项目需要在.net下将pdf转换为普通图像格式,在网上搜了好久终于找到一个解决方案,于是采用拿来主义直接用。来源见代码中注释,感谢原作者。

 

 

 

using System; using System.Collections.Generic; using System.Text; using System.Runtime.InteropServices; using System.Collections; /** Convert PDF to Image Format(JPEG) using Ghostscript API convert a pdf to jpeg using ghostscript command line: gswin32c -q -dQUIET -dPARANOIDSAFER -dBATCH -dNOPAUSE -dNOPROMPT -dMaxBitmap=500000000 -dFirstPage=1 -dAlignToPixels=0 -dGridFitTT=0 -sDEVICE=jpeg -dTextAlphaBits=4 -dGraphicsAlphaBits=4 -r100x100 -sOutputFile=output.jpg test.pdf see also:http://www.mattephraim.com/blog/2009/01/06/a-simple-c-wrapper-for-ghostscript/ and: http://www.codeproject.com/KB/cs/GhostScriptUseWithCSharp.aspx Note:copy gsdll32.dll to system32 directory before using this ghostscript wrapper. * */ namespace ConvertPDF { /// <summary> /// /// Class to convert a pdf to an image using GhostScript DLL /// Credit for this code go to:Rangel Avulso /// i only fix a little bug and refactor a little /// http://www.hrangel.com.br/index.php/2006/12/04/converter-pdf-para-imagem-jpeg-em-c/ /// </summary> /// <seealso cref="http://www.hrangel.com.br/index.php/2006/12/04/converter-pdf-para-imagem-jpeg-em-c/"/> class PDFConvert { #region GhostScript Import /// <summary>Create a new instance of Ghostscript. This instance is passed to most other gsapi functions. The caller_handle will be provided to callback functions. /// At this stage, Ghostscript supports only one instance. </summary> /// <param name="pinstance"></param> /// <param name="caller_handle"></param> /// <returns></returns> [DllImport("gsdll32.dll", EntryPoint="gsapi_new_instance")] private static extern int gsapi_new_instance (out IntPtr pinstance, IntPtr caller_handle); /// <summary>This is the important function that will perform the conversion</summary> /// <param name="instance"></param> /// <param name="argc"></param> /// <param name="argv"></param> /// <returns></returns> [DllImport("gsdll32.dll", EntryPoint="gsapi_init_with_args")] private static extern int gsapi_init_with_args (IntPtr instance, int argc, IntPtr argv); /// <summary> /// Exit the interpreter. This must be called on shutdown if gsapi_init_with_args() has been called, and just before gsapi_delete_instance(). /// </summary> /// <param name="instance"></param> /// <returns></returns> [DllImport("gsdll32.dll", EntryPoint="gsapi_exit")] private static extern int gsapi_exit (IntPtr instance); /// <summary> /// Destroy an instance of Ghostscript. Before you call this, Ghostscript must have finished. If Ghostscript has been initialised, you must call gsapi_exit before gsapi_delete_instance. /// </summary> /// <param name="instance"></param> [DllImport("gsdll32.dll", EntryPoint="gsapi_delete_instance")] private static extern void gsapi_delete_instance (IntPtr instance); #endregion #region Variables private string _sDeviceFormat; private int _iWidth; private int _iHeight; private int _iResolutionX; private int _iResolutionY; private int _iJPEGQuality; private Boolean _bFitPage; private IntPtr _objHandle; #endregion #region Proprieties public string OutputFormat { get { return _sDeviceFormat; } set { _sDeviceFormat = value; } } public int Width { get { return _iWidth; } set { _iWidth = value; } } public int Height { get { return _iHeight; } set { _iHeight = value; } } public int ResolutionX { get { return _iResolutionX; } set { _iResolutionX = value; } } public int ResolutionY { get { return _iResolutionY; } set { _iResolutionY = value; } } public Boolean FitPage { get { return _bFitPage; } set { _bFitPage = value; } } /// <summary>Quality of compression of JPG</summary> public int JPEGQuality { get { return _iJPEGQuality; } set { _iJPEGQuality = value; } } #endregion #region Init public PDFConvert(IntPtr objHandle) { _objHandle = objHandle; } public PDFConvert() { _objHandle = IntPtr.Zero; } #endregion private byte[] StringToAnsiZ(string str) { //' Convert a Unicode string to a null terminated Ansi string for Ghostscript. //' The result is stored in a byte array. Later you will need to convert //' this byte array to a pointer with GCHandle.Alloc(XXXX, GCHandleType.Pinned) //' and GSHandle.AddrOfPinnedObject() int intElementCount; int intCounter; byte[] aAnsi; byte bChar; intElementCount = str.Length; aAnsi = new byte[intElementCount+1]; for(intCounter = 0; intCounter < intElementCount;intCounter++) { bChar = (byte)str[intCounter]; aAnsi[intCounter] = bChar; } aAnsi[intElementCount] = 0; return aAnsi; } /// <summary>Convert the file!</summary> public void Convert(string inputFile,string outputFile, int firstPage, int lastPage, string deviceFormat, int width, int height) { //Avoid to work when the file doesn't exist if (!System.IO.File.Exists(inputFile)) { System.Windows.Forms.MessageBox.Show(string.Format("The file :'{0}' doesn't exist",inputFile)); return; } int intReturn; IntPtr intGSInstanceHandle; object[] aAnsiArgs; IntPtr[] aPtrArgs; GCHandle[] aGCHandle; int intCounter; int intElementCount; IntPtr callerHandle; GCHandle gchandleArgs; IntPtr intptrArgs; string[] sArgs = GetGeneratedArgs(inputFile,outputFile, firstPage, lastPage, deviceFormat, width, height); // Convert the Unicode strings to null terminated ANSI byte arrays // then get pointers to the byte arrays. intElementCount = sArgs.Length; aAnsiArgs = new object[intElementCount]; aPtrArgs = new IntPtr[intElementCount]; aGCHandle = new GCHandle[intElementCount]; // Create a handle for each of the arguments after // they've been converted to an ANSI null terminated // string. Then store the pointers for each of the handles for(intCounter = 0; intCounter< intElementCount; intCounter++) { aAnsiArgs[intCounter] = StringToAnsiZ(sArgs[intCounter]); aGCHandle[intCounter] = GCHandle.Alloc(aAnsiArgs[intCounter], GCHandleType.Pinned); aPtrArgs[intCounter] = aGCHandle[intCounter].AddrOfPinnedObject(); } // Get a new handle for the array of argument pointers gchandleArgs = GCHandle.Alloc(aPtrArgs, GCHandleType.Pinned); intptrArgs = gchandleArgs.AddrOfPinnedObject(); intReturn = gsapi_new_instance(out intGSInstanceHandle, _objHandle); callerHandle = IntPtr.Zero; try { intReturn = gsapi_init_with_args(intGSInstanceHandle, intElementCount, intptrArgs); } catch (Exception ex) { //System.Windows.Forms.MessageBox.Show(ex.Message); } finally { for (intCounter = 0; intCounter < intReturn; intCounter++) { aGCHandle[intCounter].Free(); } gchandleArgs.Free(); gsapi_exit(intGSInstanceHandle); gsapi_delete_instance(intGSInstanceHandle); } } private string[] GetGeneratedArgs(string inputFile, string outputFile, int firstPage, int lastPage, string deviceFormat, int width, int height) { this._sDeviceFormat = deviceFormat; this._iResolutionX = width; this._iResolutionY = height; // Count how many extra args are need - HRangel - 11/29/2006, 3:13:43 PM ArrayList lstExtraArgs = new ArrayList(); if ( _sDeviceFormat=="jpg" && _iJPEGQuality > 0 && _iJPEGQuality < 101) lstExtraArgs.Add("-dJPEGQ=" + _iJPEGQuality); if (_iWidth > 0 && _iHeight > 0) lstExtraArgs.Add("-g" + _iWidth + "x" + _iHeight); if (_bFitPage) lstExtraArgs.Add("-dPDFFitPage"); if (_iResolutionX > 0) { if (_iResolutionY > 0) lstExtraArgs.Add("-r" + _iResolutionX + "x" + _iResolutionY); else lstExtraArgs.Add("-r" + _iResolutionX); } // Load Fixed Args - HRangel - 11/29/2006, 3:34:02 PM int iFixedCount = 17; int iExtraArgsCount = lstExtraArgs.Count; string[] args = new string[iFixedCount + lstExtraArgs.Count]; /* // Keep gs from writing information to standard output "-q", "-dQUIET", "-dPARANOIDSAFER", // Run this command in safe mode "-dBATCH", // Keep gs from going into interactive mode "-dNOPAUSE", // Do not prompt and pause for each page "-dNOPROMPT", // Disable prompts for user interaction "-dMaxBitmap=500000000", // Set high for better performance // Set the starting and ending pages String.Format("-dFirstPage={0}", firstPage), String.Format("-dLastPage={0}", lastPage), // Configure the output anti-aliasing, resolution, etc "-dAlignToPixels=0", "-dGridFitTT=0", "-sDEVICE=jpeg", "-dTextAlphaBits=4", "-dGraphicsAlphaBits=4", */ args[0]="pdf2img";//this parameter have little real use args[1]="-dNOPAUSE";//I don't want interruptions args[2]="-dBATCH";//stop after //args[3]="-dSAFER"; args[3] = "-dPARANOIDSAFER"; args[4]="-sDEVICE="+_sDeviceFormat;//what kind of export format i should provide args[5] = "-q"; args[6] = "-dQUIET"; args[7] = "-dNOPROMPT"; args[8] = "-dMaxBitmap=500000000"; args[9] = String.Format("-dFirstPage={0}", firstPage); args[10] = String.Format("-dLastPage={0}", lastPage); args[11] = "-dAlignToPixels=0"; args[12] = "-dGridFitTT=0"; args[13] = "-dTextAlphaBits=4"; args[14] = "-dGraphicsAlphaBits=4"; //For a complete list watch here: //http://pages.cs.wisc.edu/~ghost/doc/cvs/Devices.htm //Fill the remaining parameters for (int i=0; i < iExtraArgsCount; i++) { args[15+i] = (string) lstExtraArgs[i]; } //Fill outputfile and inputfile args[15 + iExtraArgsCount] = string.Format("-sOutputFile={0}",outputFile); args[16 + iExtraArgsCount] = string.Format("{0}",inputFile); return args; } public void pdf2jpgTest() { this.Convert(@"C://tmp//pdfimg//test1.pdf",@"C://tmp//pdfimg//out.jpg",1,1,"jpeg",100,100); //this.Convert(@"C://tmp//pdfimg//test.pdf", @"C://tmp//pdfimg//out2.jpg", 291, 291, "jpeg", 800, 800); } } }

 

 

测试WinForm:

可以采用下面的方式测试调用上面的功能,如:

 PDFConvert convertor = new PDFConvert();
 convertor.pdf2jpgTest();

 

using System; using System.Collections.Generic; using System.ComponentModel; using System.Data; using System.Drawing; using System.Linq; using System.Text; using System.Windows.Forms; using ConvertPDF; namespace PDF2Img { public partial class Form1 : Form { public Form1() { InitializeComponent(); } private void button1_Click(object sender, EventArgs e) { PDFConvert convertor = new PDFConvert(); convertor.pdf2jpgTest(); Image img = Image.FromFile(@"C://tmp//pdfimg//out.jpg"); myBitmap = new Bitmap(img); Graphics G = this.CreateGraphics(); GraphicsUnit GU = G.PageUnit; BMPContainer = myBitmap.GetBounds(ref GU); //X,Y = 0 // Graphics g = this.CreateGraphics(); //g.DrawImage(myBitmap, 1, 1); this.Invalidate(); } private Bitmap myBitmap; private RectangleF BMPContainer; protected override void OnPaint(PaintEventArgs e) { Graphics G = e.Graphics; if (myBitmap != null) { G.DrawImage(myBitmap, BMPContainer); } base.OnPaint(e); } } }

目录
相关文章
|
27天前
|
机器学习/深度学习 人工智能 文字识别
Zerox:AI驱动的万能OCR工具,精准识别复杂布局并输出Markdown格式,支持PDF、DOCX、图片等多种文件格式
Zerox 是一款开源的本地化高精度OCR工具,基于GPT-4o-mini模型,支持PDF、DOCX、图片等多种格式文件,能够零样本识别复杂布局文档,输出Markdown格式结果。
108 4
Zerox:AI驱动的万能OCR工具,精准识别复杂布局并输出Markdown格式,支持PDF、DOCX、图片等多种文件格式
|
1月前
|
JSON 前端开发 搜索推荐
关于商品详情 API 接口 JSON 格式返回数据解析的示例
本文介绍商品详情API接口返回的JSON数据解析。最外层为`product`对象,包含商品基本信息(如id、name、price)、分类信息(category)、图片(images)、属性(attributes)、用户评价(reviews)、库存(stock)和卖家信息(seller)。每个字段详细描述了商品的不同方面,帮助开发者准确提取和展示数据。具体结构和字段含义需结合实际业务需求和API文档理解。
|
1月前
|
人工智能 文字识别 自然语言处理
Vision Parse:开源的 PDF 转 Markdown 工具,结合视觉语言模型和 OCR,识别文本和表格并保持原格式
Vision Parse 是一款开源的 PDF 转 Markdown 工具,基于视觉语言模型,能够智能识别和提取 PDF 中的文本和表格,并保持原有格式和结构。
161 19
Vision Parse:开源的 PDF 转 Markdown 工具,结合视觉语言模型和 OCR,识别文本和表格并保持原格式
|
2月前
|
人工智能 JSON Linux
利用阿里云GPU加速服务器实现pdf转换为markdown格式
随着AI模型的发展,GPU需求日益增长,尤其是个人学习和研究。直接购置硬件成本高且更新快,建议选择阿里云等提供的GPU加速型服务器。
利用阿里云GPU加速服务器实现pdf转换为markdown格式
|
2月前
|
人工智能 文字识别 数据挖掘
MarkItDown:微软开源的多格式转Markdown工具,支持将PDF、Word、图像和音频等文件转换为Markdown格式
MarkItDown 是微软开源的多功能文档转换工具,支持将 PDF、PPT、Word、Excel、图像、音频等多种格式的文件转换为 Markdown 格式,具备 OCR 文字识别、语音转文字和元数据提取等功能。
327 9
MarkItDown:微软开源的多格式转Markdown工具,支持将PDF、Word、图像和音频等文件转换为Markdown格式
|
2月前
|
JSON API 数据格式
获取商品详情API的请求格式是什么
获取商品详情API的请求格式通常依赖于特定的电商平台或服务提供商,但一般遵循类似的结构。以下是一个概括性的说明,以及针对几个主流电商平台的示例:
|
3月前
|
JSON API 数据安全/隐私保护
拍立淘按图搜索API接口返回数据的JSON格式示例
拍立淘按图搜索API接口允许用户通过上传图片来搜索相似的商品,该接口返回的通常是一个JSON格式的响应,其中包含了与上传图片相似的商品信息。以下是一个基于淘宝平台的拍立淘按图搜索API接口返回数据的JSON格式示例,同时提供对其关键字段的解释
|
4月前
|
JSON API 数据格式
Python| 如何使用 DALL·E 和 OpenAI API 生成图像(2)
Python| 如何使用 DALL·E 和 OpenAI API 生成图像(2)
100 0
Python| 如何使用 DALL·E 和 OpenAI API 生成图像(2)
|
4月前
|
JSON 数据格式
LangChain-20 Document Loader 文件加载 加载MD DOCX EXCEL PPT PDF HTML JSON 等多种文件格式 后续可通过FAISS向量化 增强检索
LangChain-20 Document Loader 文件加载 加载MD DOCX EXCEL PPT PDF HTML JSON 等多种文件格式 后续可通过FAISS向量化 增强检索
291 2
|
4月前
|
JSON API 数据格式
商品详情数据JSON格式示例参考(api接口)
JSON数据格式的商品详情数据通常包含商品的多个层级信息,以下是一个综合多个来源信息的JSON数据格式的商品详情数据示例参考:

热门文章

最新文章