Participant
July 12, 2024
Question
Configurati
- July 12, 2024
- 1 reply
- 282 views
I want extract the content from a pdf (purchase invoices which is image based pdf) and get json response,as per the documentation i tried this code but i got this error or log message,what is this issue related to configuration?
Here is the complete code (Node sdk)
import {
ServicePrincipalCredentials,
PDFServices,
MimeType,
ExtractPDFParams,
ExtractElementType,
ExtractPDFJob,
ExtractPDFResult,
} from "@adobe/pdfservices-node-sdk";
import * as fs from "fs";
import AdmZip from "adm-zip";
import dotenv from "dotenv";
dotenv.config();
// console.log(process.env.PDF_SERVICES_CLIENT_SECRET);
class AdobeExtractAPI {
async extractTextFromPDF() {
try {
// Initial setup, create credentials instance
const credentials = new ServicePrincipalCredentials({
clientId: process.env.PDF_SERVICES_CLIENT_ID,
clientSecret: process.env.PDF_SERVICES_CLIENT_SECRET,
});
// Creates a PDF Services instance
const pdfServices = new PDFServices({ credentials });
// Creates an asset(s) from source file(s) and upload
const readStream = fs.createReadStream("./Adobe Extract API Sample.pdf");
const inputAsset = await pdfServices.upload({
readStream,
mimeType: MimeType.PDF,
});
// Create parameters for the job
const params = new ExtractPDFParams({
elementsToExtract: [ExtractElementType.TEXT],
});
// Creates a new job instance
const job = new ExtractPDFJob({ inputAsset, params });
// Submit the job and get the job result
const pollingURL = await pdfServices.submit({ job });
const pdfServicesResponse = await pdfServices.getJobResult({
pollingURL,
resultType: ExtractPDFResult,
});
// Get content from the resulting asset(s)
const resultAsset = pdfServicesResponse.result.resource;
const streamAsset = await pdfServices.getContent({ asset: resultAsset });
// Creates a write stream and copy stream asset's content to it
const outputFilePath = "./ExtractTextInfoFromPDF.zip";
console.log(`Saving asset at ${outputFilePath}`);
const writeStream = fs.createWriteStream(outputFilePath);
streamAsset.readStream.pipe(writeStream);
let zip = new AdmZip(outputFilePath);
let jsondata = zip.readAsText("structuredData.json");
let data = JSON.parse(jsondata);
data.elements.forEach((element) => {
if (element.Path.endsWith("/H1")) {
console.log(element.Text);
}
});
} catch (err) {
console.log("Exception encountered while executing operation", err);
} finally {
readStream?.destroy();
}
}
}
export default AdobeExtractAPI;
