• Global community
    • Language:
      • Deutsch
      • English
      • Español
      • Français
      • Português
  • 日本語コミュニティ
    Dedicated community for Japanese speakers
  • 한국 커뮤니티
    Dedicated community for Korean speakers
Exit
0

.writeToStream on the fileRef returned from extractPDFOperation not working

New Here ,
Aug 16, 2023 Aug 16, 2023

Copy link to clipboard

Copied

I was attempting to read the zip file directly to the code memory due to working in a read-only serverless environment instead of saving and then reading and the writeToStream method does not work. This works with other functions where a pdf files are returned by the sdk (like split PDF), but not zip files returned from extracting text. The documentation clearly states this meethod should be an option but is it not in this case.

Exception encountered while executing operation TypeError: result.saveToStream is not a function

Views

1.0K

Translate

Translate

Report

Report
Community guidelines
Be kind and respectful, give credit to the original source of content, and search for duplicates before posting. Learn more
community guidelines

correct answers 1 Correct answer

Adobe Employee , Aug 16, 2023 Aug 16, 2023

Oh, you are using saveAsStream:  "result.saveToStream is not a function" Shouldn't it be writeToStream?

Votes

Translate

Translate
Adobe Employee ,
Aug 16, 2023 Aug 16, 2023

Copy link to clipboard

Copied

Could you share a bit more of your code?

Votes

Translate

Translate

Report

Report
Community guidelines
Be kind and respectful, give credit to the original source of content, and search for duplicates before posting. Learn more
community guidelines
New Here ,
Aug 16, 2023 Aug 16, 2023

Copy link to clipboard

Copied

 ScrapeK1: async function ScrapeK1(data) {
    console.log("FX ScrapeK1");
    try {
     
      const PDFServicesSdk = require("@adobe/pdfservices-node-sdk");
      const path = require("path");

      const credentials = await PDFServicesSdk.Credentials.servicePrincipalCredentialsBuilder()
        .withClientId(process.env.PDF_SERVICES_CLIENT_ID)
        .withClientSecret(process.env.PDF_SERVICES_CLIENT_SECRET)
        .build();

      const executionContext = PDFServicesSdk.ExecutionContext.create(credentials);

      const options = new PDFServicesSdk.ExtractPDF.options.ExtractPdfOptions.Builder()
        .addElementsToExtract(PDFServicesSdk.ExtractPDF.options.ExtractElementType.TEXT)
        .build();

      const extractPDFOperation = PDFServicesSdk.ExtractPDF.Operation.createNew();

      const buffer = data.file;
      console.log(buffer);

      const stream = Readable.from(buffer);

      console.log(stream);
      const input = PDFServicesSdk.FileRef.createFromStream(stream, "application/pdf");

      extractPDFOperation.setInput(input);

      extractPDFOperation.setOptions(options);

      // Generating a file name
      let outputFilePath = createOutputFilePath("/tmp");

      const AdmZip = require("adm-zip");

      return await extractPDFOperation
        .execute(executionContext)
        .then(async (result) => {
          // Save the zip file -- Right here is the function that cannot use the steam method
          await result.saveAsFile(outputFilePath);

          const zip = new AdmZip(outputFilePath);
          const zipEntries = zip.getEntries();

          const structuredDataEntry = zipEntries.find((entry) => entry.entryName === "structuredData.json");
          if (!structuredDataEntry) {
            console.log("structuredData.json not found in the zip.");
            return;
          }

          const jsonData = structuredDataEntry.getData().toString("utf8");

          const parsedData = JSON.parse(jsonData);

          console.log("JSON Data:", util.inspect(parsedData, { depth: null }));

          return parsedData.elements;
        })
        .catch((err) => {
          if (err instanceof PDFServicesSdk.Error.ServiceApiError || err instanceof PDFServicesSdk.Error.ServiceUsageError) {
            console.log("Exception encountered while executing operation", err);
          } else {
            console.log("Exception encountered while executing operation", err);
          }
        });

      //Generates a string containing a directory structure and file name for the output file.
      function createOutputFilePath(directory) {
        let date = new Date();
        let dateString =
          date.getFullYear() +
          "-" +
          ("0" + (date.getMonth() + 1)).slice(-2) +
          "-" +
          ("0" + date.getDate()).slice(-2) +
          "T" +
          ("0" + date.getHours()).slice(-2) +
          "-" +
          ("0" + date.getMinutes()).slice(-2) +
          "-" +
          ("0" + date.getSeconds()).slice(-2);
        return path.join(directory, dateString + ".zip");
      }
    } catch (err) {
      console.log("Exception encountered while executing operation", err);
    }
  },

Votes

Translate

Translate

Report

Report
Community guidelines
Be kind and respectful, give credit to the original source of content, and search for duplicates before posting. Learn more
community guidelines
Adobe Employee ,
Aug 16, 2023 Aug 16, 2023

Copy link to clipboard

Copied

If you logout result, is it a FileRef? What object does it appear to be.

 

Also, if I were doing serverless stuff with our APIs, I'd skip the SDK and just hit the *super* simple REST API direct. Much more control that way.

Votes

Translate

Translate

Report

Report
Community guidelines
Be kind and respectful, give credit to the original source of content, and search for duplicates before posting. Learn more
community guidelines
Adobe Employee ,
Aug 16, 2023 Aug 16, 2023

Copy link to clipboard

Copied

Oh, you are using saveAsStream:  "result.saveToStream is not a function" Shouldn't it be writeToStream?

Votes

Translate

Translate

Report

Report
Community guidelines
Be kind and respectful, give credit to the original source of content, and search for duplicates before posting. Learn more
community guidelines
New Here ,
Aug 16, 2023 Aug 16, 2023

Copy link to clipboard

Copied

You are correct, I dont know how i missed that reading it 100 times over

Votes

Translate

Translate

Report

Report
Community guidelines
Be kind and respectful, give credit to the original source of content, and search for duplicates before posting. Learn more
community guidelines
Adobe Employee ,
Aug 17, 2023 Aug 17, 2023

Copy link to clipboard

Copied

No worries, glad you got it.

Votes

Translate

Translate

Report

Report
Community guidelines
Be kind and respectful, give credit to the original source of content, and search for duplicates before posting. Learn more
community guidelines
New Here ,
Feb 08, 2024 Feb 08, 2024

Copy link to clipboard

Copied

Hi Raymond, 

Appreciated if you could write a code for writeToStream, as I could not find any sample code for doing so.

As I want to convert the result (which should be FileRef) to writableStream directly, without needing to saveAsFile, because I could not have access right on the file system of the cloud's production environment.

 

 

 

 

Votes

Translate

Translate

Report

Report
Community guidelines
Be kind and respectful, give credit to the original source of content, and search for duplicates before posting. Learn more
community guidelines
Adobe Employee ,
Feb 08, 2024 Feb 08, 2024

Copy link to clipboard

Copied

Eh.... I don't do a lot with streams. I have, but just barely. Have you checked the docs on streams? https://nodejs.org/api/stream.html

Votes

Translate

Translate

Report

Report
Community guidelines
Be kind and respectful, give credit to the original source of content, and search for duplicates before posting. Learn more
community guidelines
New Here ,
Feb 09, 2024 Feb 09, 2024

Copy link to clipboard

Copied

Yes, after checked the docs on streams as you suggested, I solved the issue and it works now. Thanks Raymond 👍

Votes

Translate

Translate

Report

Report
Community guidelines
Be kind and respectful, give credit to the original source of content, and search for duplicates before posting. Learn more
community guidelines
New Here ,
Feb 16, 2024 Feb 16, 2024

Copy link to clipboard

Copied

How did you fix it? Having the same issue?

Votes

Translate

Translate

Report

Report
Community guidelines
Be kind and respectful, give credit to the original source of content, and search for duplicates before posting. Learn more
community guidelines
New Here ,
Feb 17, 2024 Feb 17, 2024

Copy link to clipboard

Copied

LATEST

For my application, I have a form on client-side to upload a single PDF file and post it to the server-side endpoint,

then on server-side, return back to the client-side the unzipped CSV files only.

Here below is the server-server code for your reference :

 

'use strict'
const express = require('express')
const app = express()
const port = 8080
const PDFServicesSdk = require('@adobe/pdfservices-node-sdk')
const fs = require('fs')
const formidable = require('formidable')
const AdmZip = require('adm-zip')
const Stream = require('stream')

app.use('/static', express.static(__dirname + '/public'))

app.get('/', (req, res) => {
res.sendFile(__dirname + '/public/extract_pdf.html')
})

app.post('/extract_pdf_to_zip_api', function(req, res){
var form = new formidable.IncomingForm()
form.parse(req, (err, fields, files) => {
if (err) {
next(err)
}
var file = files['uploads_file[0]'][0]
var data = fs.readFileSync(file.filepath)
var stream = Stream.Readable.from(data)
const credentials = PDFServicesSdk.Credentials
.servicePrincipalCredentialsBuilder()
.withClientId(process.env.PDF_SERVICES_CLIENT_ID)
.withClientSecret(process.env.PDF_SERVICES_CLIENT_SECRET)
.build()
const executionContext = PDFServicesSdk.ExecutionContext.create(credentials)
const options = new PDFServicesSdk.ExtractPDF.options.ExtractPdfOptions.Builder()
.addElementsToExtract(PDFServicesSdk.ExtractPDF.options.ExtractElementType.TEXT, PDFServicesSdk.ExtractPDF.options.ExtractElementType.TABLES)
.addElementsToExtractRenditions(PDFServicesSdk.ExtractPDF.options.ExtractRenditionsElementType.TABLES)
.addTableStructureFormat(PDFServicesSdk.ExtractPDF.options.TableStructureType.CSV)
.build()
const extractPDFOperation = PDFServicesSdk.ExtractPDF.Operation.createNew()
var input = PDFServicesSdk.FileRef.createFromStream(stream, PDFServicesSdk.ExtractPDF.SupportedSourceFormat.pdf)
extractPDFOperation.setInput(input)
extractPDFOperation.setOptions(options)
extractPDFOperation.execute(executionContext)
.then((result) => {
var ws = new Stream
ws.writable = true
ws.buf_data = []
ws.write = function(buf) {
ws.buf_data.push(buf)
}
ws.end = function(buf) {
if(arguments.length) ws.write(buf)
ws.writable = false
ws.buf_data = Buffer.concat(ws.buf_data)
var zip = new AdmZip(ws.buf_data)
var entries = zip.getEntries()
var csv_array = []
for(let entry of entries) {
const buffer = entry.getData()
var obj = {}
if ( entry.entryName.includes('.csv') ){
obj['name'] = entry.entryName
obj['size_in_bytes'] = buffer.length
obj['content'] = buffer.toString("utf-8")
csv_array.push(obj)
}
}
var csv_json = JSON.stringify(csv_array)
res.send(csv_json)
}
result.writeToStream(ws)
})
.catch(err => console.log(err))
})
})

app.listen(port, () => {
console.log(`Example app listening on port ${port}`)
})

module.exports = app

Votes

Translate

Translate

Report

Report
Community guidelines
Be kind and respectful, give credit to the original source of content, and search for duplicates before posting. Learn more
community guidelines
Resources