Skip to main content
Participant
July 12, 2024
Question

Configurati

  • July 12, 2024
  • 1 reply
  • 282 views

I want extract the content from a pdf (purchase invoices which is image based pdf) and get json response,as per the documentation i tried this code  but i got this error or log message,what is this issue related to configuration? 

Here is the complete code (Node sdk)

import {
  ServicePrincipalCredentials,
  PDFServices,
  MimeType,
  ExtractPDFParams,
  ExtractElementType,
  ExtractPDFJob,
  ExtractPDFResult,
} from "@adobe/pdfservices-node-sdk";
import * as fs from "fs";
import AdmZip from "adm-zip";
import  dotenv from "dotenv";

dotenv.config();
// console.log(process.env.PDF_SERVICES_CLIENT_SECRET);

class AdobeExtractAPI {
  async extractTextFromPDF() {
    try {
      // Initial setup, create credentials instance
      const credentials = new ServicePrincipalCredentials({
        clientId: process.env.PDF_SERVICES_CLIENT_ID,
        clientSecret: process.env.PDF_SERVICES_CLIENT_SECRET,
      });

      // Creates a PDF Services instance
      const pdfServices = new PDFServices({ credentials });

      // Creates an asset(s) from source file(s) and upload
      const readStream = fs.createReadStream("./Adobe Extract API Sample.pdf");
      const inputAsset = await pdfServices.upload({
        readStream,
        mimeType: MimeType.PDF,
      });

      // Create parameters for the job
      const params = new ExtractPDFParams({
        elementsToExtract: [ExtractElementType.TEXT],
      });

      // Creates a new job instance
      const job = new ExtractPDFJob({ inputAsset, params });

      // Submit the job and get the job result
      const pollingURL = await pdfServices.submit({ job });
      const pdfServicesResponse = await pdfServices.getJobResult({
        pollingURL,
        resultType: ExtractPDFResult,
      });

      // Get content from the resulting asset(s)
      const resultAsset = pdfServicesResponse.result.resource;
      const streamAsset = await pdfServices.getContent({ asset: resultAsset });

      // Creates a write stream and copy stream asset's content to it
      const outputFilePath = "./ExtractTextInfoFromPDF.zip";
      console.log(`Saving asset at ${outputFilePath}`);

      const writeStream = fs.createWriteStream(outputFilePath);
      streamAsset.readStream.pipe(writeStream);

      let zip = new AdmZip(outputFilePath);
      let jsondata = zip.readAsText("structuredData.json");
      let data = JSON.parse(jsondata);
      data.elements.forEach((element) => {
        if (element.Path.endsWith("/H1")) {
          console.log(element.Text);
        }
      });
    } catch (err) {
      console.log("Exception encountered while executing operation", err);
    } finally {
      readStream?.destroy();
    }
  }
}

export default AdobeExtractAPI;

 

This topic has been closed for replies.

1 reply

Raymond Camden
Community Manager
Community Manager
July 12, 2024

What error?  I don't see one.