Copy link to clipboard
Copied
I'm using the Adobe PDF Services API to convert PDF files to DOCX format.
My setup works well, but I’m encountering an issue with scanned documents. (net8.0, C#)
When I upload a scanned PDF, the service automatically applies OCR, generating text content rather than keeping the scanned pages as images in the DOCX file.
For my specific use case, I need the output DOCX file to preserve the scanned PDF pages as images without any text recognition or OCR processing. I couldn't find an option in ExportPDFParams or other related classes to disable OCR.
Is there a way to configure Adobe PDF Services to bypass OCR when converting scanned PDFs to DOCX?
public bool TryConvertWithAdobe(string filename)
{
string docXPath = filename + ".converted.docx";
var clientId = Environment.GetEnvironmentVariable("ADOBE_SERVICES_CLIENT_ID");
var secret = Environment.GetEnvironmentVariable("ADOBE_SERVICES_CLIENT_SECRET");
if (clientId == null || secret == null)
{
Logger.Info("Cannot convert pdf to docx, no Adobe credentials found. Skip this step.");
return false;
}
try
{
var credentials = new ServicePrincipalCredentials(clientId, secret);
PDFServices pdfServices = new PDFServices(credentials);
using Stream inputStream = File.OpenRead(filename);
IAsset asset = pdfServices.Upload(inputStream, PDFServicesMediaType.PDF.GetMIMETypeValue());
var exportPDFParams = ExportPDFParams.ExportPDFParamsBuilder(ExportPDFTargetFormat.DOCX)
.Build();
var exportPDFJob = new ExportPDFJob(asset, exportPDFParams);
var location = pdfServices.Submit(exportPDFJob);
PDFServicesResponse<ExportPDFResult> pdfServicesResponse =
pdfServices.GetJobResult<ExportPDFResult>(location, typeof(ExportPDFResult));
IAsset resultAsset = pdfServicesResponse.Result.Asset;
StreamAsset streamAsset = pdfServices.GetContent(resultAsset);
Stream outputStream = File.OpenWrite(docXPath);
streamAsset.Stream.CopyTo(outputStream);
outputStream.Close();
return true;
}
catch (Exception ex)
{
Logger.Error("Cannot convert pdf to docx with Adobe services: " + ex.Message);
return false;
}
}
Thank you!
Have something to add?