Exit
  • Global community
    • Language:
      • Deutsch
      • English
      • Español
      • Français
      • Português
  • 日本語コミュニティ
  • 한국 커뮤니티
0

.writeToStream on the fileRef returned from extractPDFOperation not working

New Here ,
Aug 16, 2023 Aug 16, 2023

I was attempting to read the zip file directly to the code memory due to working in a read-only serverless environment instead of saving and then reading and the writeToStream method does not work. This works with other functions where a pdf files are returned by the sdk (like split PDF), but not zip files returned from extracting text. The documentation clearly states this meethod should be an option but is it not in this case.

Exception encountered while executing operation TypeError: result.saveToStream is not a function

1.3K
Translate
Report
Community guidelines
Be kind and respectful, give credit to the original source of content, and search for duplicates before posting. Learn more
community guidelines

correct answers 1 Correct answer

Adobe Employee , Aug 16, 2023 Aug 16, 2023

Oh, you are using saveAsStream:  "result.saveToStream is not a function" Shouldn't it be writeToStream?

Translate
Adobe Employee ,
Aug 16, 2023 Aug 16, 2023

Could you share a bit more of your code?

Translate
Report
Community guidelines
Be kind and respectful, give credit to the original source of content, and search for duplicates before posting. Learn more
community guidelines
New Here ,
Aug 16, 2023 Aug 16, 2023
 ScrapeK1: async function ScrapeK1(data) {
    console.log("FX ScrapeK1");
    try {
     
      const PDFServicesSdk = require("@adobe/pdfservices-node-sdk");
      const path = require("path");

      const credentials = await PDFServicesSdk.Credentials.servicePrincipalCredentialsBuilder()
        .withClientId(process.env.PDF_SERVICES_CLIENT_ID)
        .withClientSecret(process.env.PDF_SERVICES_CLIENT_SECRET)
        .build();

      const executionContext = PDFServicesSdk.ExecutionContext.create(credentials);

      const options = new PDFServicesSdk.ExtractPDF.options.ExtractPdfOptions.Builder()
        .addElementsToExtract(PDFServicesSdk.ExtractPDF.options.ExtractElementType.TEXT)
        .build();

      const extractPDFOperation = PDFServicesSdk.ExtractPDF.Operation.createNew();

      const buffer = data.file;
      console.log(buffer);

      const stream = Readable.from(buffer);

      console.log(stream);
      const input = PDFServicesSdk.FileRef.createFromStream(stream, "application/pdf");

      extractPDFOperation.setInput(input);

      extractPDFOperation.setOptions(options);

      // Generating a file name
      let outputFilePath = createOutputFilePath("/tmp");

      const AdmZip = require("adm-zip");

      return await extractPDFOperation
        .execute(executionContext)
        .then(async (result) => {
          // Save the zip file -- Right here is the function that cannot use the steam method
          await result.saveAsFile(outputFilePath);

          const zip = new AdmZip(outputFilePath);
          const zipEntries = zip.getEntries();

          const structuredDataEntry = zipEntries.find((entry) => entry.entryName === "structuredData.json");
          if (!structuredDataEntry) {
            console.log("structuredData.json not found in the zip.");
            return;
          }

          const jsonData = structuredDataEntry.getData().toString("utf8");

          const parsedData = JSON.parse(jsonData);

          console.log("JSON Data:", util.inspect(parsedData, { depth: null }));

          return parsedData.elements;
        })
        .catch((err) => {
          if (err instanceof PDFServicesSdk.Error.ServiceApiError || err instanceof PDFServicesSdk.Error.ServiceUsageError) {
            console.log("Exception encountered while executing operation", err);
          } else {
            console.log("Exception encountered while executing operation", err);
          }
        });

      //Generates a string containing a directory structure and file name for the output file.
      function createOutputFilePath(directory) {
        let date = new Date();
        let dateString =
          date.getFullYear() +
          "-" +
          ("0" + (date.getMonth() + 1)).slice(-2) +
          "-" +
          ("0" + date.getDate()).slice(-2) +
          "T" +
          ("0" + date.getHours()).slice(-2) +
          "-" +
          ("0" + date.getMinutes()).slice(-2) +
          "-" +
          ("0" + date.getSeconds()).slice(-2);
        return path.join(directory, dateString + ".zip");
      }
    } catch (err) {
      console.log("Exception encountered while executing operation", err);
    }
  },
Translate
Report
Community guidelines
Be kind and respectful, give credit to the original source of content, and search for duplicates before posting. Learn more
community guidelines
Adobe Employee ,
Aug 16, 2023 Aug 16, 2023

If you logout result, is it a FileRef? What object does it appear to be.

 

Also, if I were doing serverless stuff with our APIs, I'd skip the SDK and just hit the *super* simple REST API direct. Much more control that way.

Translate
Report
Community guidelines
Be kind and respectful, give credit to the original source of content, and search for duplicates before posting. Learn more
community guidelines
Adobe Employee ,
Aug 16, 2023 Aug 16, 2023

Oh, you are using saveAsStream:  "result.saveToStream is not a function" Shouldn't it be writeToStream?

Translate
Report
Community guidelines
Be kind and respectful, give credit to the original source of content, and search for duplicates before posting. Learn more
community guidelines
New Here ,
Aug 16, 2023 Aug 16, 2023

You are correct, I dont know how i missed that reading it 100 times over

Translate
Report
Community guidelines
Be kind and respectful, give credit to the original source of content, and search for duplicates before posting. Learn more
community guidelines
Adobe Employee ,
Aug 17, 2023 Aug 17, 2023

No worries, glad you got it.

Translate
Report
Community guidelines
Be kind and respectful, give credit to the original source of content, and search for duplicates before posting. Learn more
community guidelines
New Here ,
Feb 08, 2024 Feb 08, 2024

Hi Raymond, 

Appreciated if you could write a code for writeToStream, as I could not find any sample code for doing so.

As I want to convert the result (which should be FileRef) to writableStream directly, without needing to saveAsFile, because I could not have access right on the file system of the cloud's production environment.

 

 

 

 

Translate
Report
Community guidelines
Be kind and respectful, give credit to the original source of content, and search for duplicates before posting. Learn more
community guidelines
Adobe Employee ,
Feb 08, 2024 Feb 08, 2024

Eh.... I don't do a lot with streams. I have, but just barely. Have you checked the docs on streams? https://nodejs.org/api/stream.html

Translate
Report
Community guidelines
Be kind and respectful, give credit to the original source of content, and search for duplicates before posting. Learn more
community guidelines
New Here ,
Feb 09, 2024 Feb 09, 2024

Yes, after checked the docs on streams as you suggested, I solved the issue and it works now. Thanks Raymond 👍

Translate
Report
Community guidelines
Be kind and respectful, give credit to the original source of content, and search for duplicates before posting. Learn more
community guidelines
New Here ,
Feb 16, 2024 Feb 16, 2024

How did you fix it? Having the same issue?

Translate
Report
Community guidelines
Be kind and respectful, give credit to the original source of content, and search for duplicates before posting. Learn more
community guidelines
New Here ,
Feb 17, 2024 Feb 17, 2024
LATEST

For my application, I have a form on client-side to upload a single PDF file and post it to the server-side endpoint,

then on server-side, return back to the client-side the unzipped CSV files only.

Here below is the server-server code for your reference :

 

'use strict'
const express = require('express')
const app = express()
const port = 8080
const PDFServicesSdk = require('@adobe/pdfservices-node-sdk')
const fs = require('fs')
const formidable = require('formidable')
const AdmZip = require('adm-zip')
const Stream = require('stream')

app.use('/static', express.static(__dirname + '/public'))

app.get('/', (req, res) => {
res.sendFile(__dirname + '/public/extract_pdf.html')
})

app.post('/extract_pdf_to_zip_api', function(req, res){
var form = new formidable.IncomingForm()
form.parse(req, (err, fields, files) => {
if (err) {
next(err)
}
var file = files['uploads_file[0]'][0]
var data = fs.readFileSync(file.filepath)
var stream = Stream.Readable.from(data)
const credentials = PDFServicesSdk.Credentials
.servicePrincipalCredentialsBuilder()
.withClientId(process.env.PDF_SERVICES_CLIENT_ID)
.withClientSecret(process.env.PDF_SERVICES_CLIENT_SECRET)
.build()
const executionContext = PDFServicesSdk.ExecutionContext.create(credentials)
const options = new PDFServicesSdk.ExtractPDF.options.ExtractPdfOptions.Builder()
.addElementsToExtract(PDFServicesSdk.ExtractPDF.options.ExtractElementType.TEXT, PDFServicesSdk.ExtractPDF.options.ExtractElementType.TABLES)
.addElementsToExtractRenditions(PDFServicesSdk.ExtractPDF.options.ExtractRenditionsElementType.TABLES)
.addTableStructureFormat(PDFServicesSdk.ExtractPDF.options.TableStructureType.CSV)
.build()
const extractPDFOperation = PDFServicesSdk.ExtractPDF.Operation.createNew()
var input = PDFServicesSdk.FileRef.createFromStream(stream, PDFServicesSdk.ExtractPDF.SupportedSourceFormat.pdf)
extractPDFOperation.setInput(input)
extractPDFOperation.setOptions(options)
extractPDFOperation.execute(executionContext)
.then((result) => {
var ws = new Stream
ws.writable = true
ws.buf_data = []
ws.write = function(buf) {
ws.buf_data.push(buf)
}
ws.end = function(buf) {
if(arguments.length) ws.write(buf)
ws.writable = false
ws.buf_data = Buffer.concat(ws.buf_data)
var zip = new AdmZip(ws.buf_data)
var entries = zip.getEntries()
var csv_array = []
for(let entry of entries) {
const buffer = entry.getData()
var obj = {}
if ( entry.entryName.includes('.csv') ){
obj['name'] = entry.entryName
obj['size_in_bytes'] = buffer.length
obj['content'] = buffer.toString("utf-8")
csv_array.push(obj)
}
}
var csv_json = JSON.stringify(csv_array)
res.send(csv_json)
}
result.writeToStream(ws)
})
.catch(err => console.log(err))
})
})

app.listen(port, () => {
console.log(`Example app listening on port ${port}`)
})

module.exports = app
Translate
Report
Community guidelines
Be kind and respectful, give credit to the original source of content, and search for duplicates before posting. Learn more
community guidelines
Resources