Question
How to convert PDF to HTML using c#?
I want to convert PDF to HTML using C#. Adobe Acrobat Pro is installed on my computer. I have written the following code when runs without any issues but can't see output.html in the folder. What could be the reason? Is there any alternate way of achieving it?
using Acrobat;
using System;
namespace PDFToHTMLWithAdobeAcrobatPro
{
internal class Program
{
static void Main(string[] args)
{
string inputFilePath = @"C:\tmp\Everest.pdf";
string outputFilePath = @"C:\tmp\output.html";
// Acrobat COM objects
AcroApp acroApp = null;
CAcroPDDoc pdfDoc = null;
try
{
// Initialize Acrobat application
acroApp = Activator.CreateInstance(Type.GetTypeFromProgID("AcroExch.App")) as AcroApp;
pdfDoc = Activator.CreateInstance(Type.GetTypeFromProgID("AcroExch.PDDoc")) as CAcroPDDoc;
if (acroApp == null || pdfDoc == null)
{
Console.WriteLine("Failed to initialize Acrobat COM objects.");
return;
}
// Open the PDF
if (!pdfDoc.Open(inputFilePath))
{
Console.WriteLine("Failed to open the PDF file.");
return;
}
// Execute JavaScript for HTML conversion
object jsObj = pdfDoc.GetJSObject();
if (jsObj != null)
{
Type jsType = jsObj.GetType();
jsType.InvokeMember(
"saveAs",
System.Reflection.BindingFlags.InvokeMethod,
null,
jsObj,
new object[] { outputFilePath, "com.adobe.acrobat.html" }
);
Console.WriteLine($"PDF successfully converted to HTML at {outputFilePath}");
}
else
{
Console.WriteLine("Failed to retrieve the JavaScript object.");
}
}
catch (Exception ex)
{
Console.WriteLine($"An error occurred: {ex.Message}");
}
finally
{
// Close the PDF document
if (pdfDoc != null)
{
pdfDoc.Close();
}
// Exit the Acrobat application
if (acroApp != null)
{
acroApp.Exit();
}
}
}
}
}