.writeToStream on the fileRef returned from extractPDFOperation not working

Report · Aug 16, 2023

I was attempting to read the zip file directly to the code memory due to working in a read-only serverless environment instead of saving and then reading and the writeToStream method does not work. This works with other functions where a pdf files are returned by the sdk (like split PDF), but not zip files returned from extracting text. The documentation clearly states this meethod should be an option but is it not in this case.

Exception encountered while executing operation TypeError: result.saveToStream is not a function

Report · Aug 16, 2023

Could you share a bit more of your code?

Report · Aug 16, 2023

ScrapeK1: async function ScrapeK1(data) {

console.log("FX ScrapeK1");

try {

const PDFServicesSdk = require("@adobe/pdfservices-node-sdk");

const path = require("path");

const credentials = await PDFServicesSdk.Credentials.servicePrincipalCredentialsBuilder()

.withClientId(process.env.PDF_SERVICES_CLIENT_ID)

.withClientSecret(process.env.PDF_SERVICES_CLIENT_SECRET)

.build();

const executionContext = PDFServicesSdk.ExecutionContext.create(credentials);

const options = new PDFServicesSdk.ExtractPDF.options.ExtractPdfOptions.Builder()

.addElementsToExtract(PDFServicesSdk.ExtractPDF.options.ExtractElementType.TEXT)

.build();

const extractPDFOperation = PDFServicesSdk.ExtractPDF.Operation.createNew();

const buffer = data.file;

console.log(buffer);

const stream = Readable.from(buffer);

console.log(stream);

const input = PDFServicesSdk.FileRef.createFromStream(stream, "application/pdf");

extractPDFOperation.setInput(input);

extractPDFOperation.setOptions(options);

// Generating a file name

let outputFilePath = createOutputFilePath("/tmp");

const AdmZip = require("adm-zip");

return await extractPDFOperation

.execute(executionContext)

.then(async (result) => {

// Save the zip file -- Right here is the function that cannot use the steam method

await result.saveAsFile(outputFilePath);

const zip = new AdmZip(outputFilePath);

const zipEntries = zip.getEntries();

const structuredDataEntry = zipEntries.find((entry) => entry.entryName === "structuredData.json");

if (!structuredDataEntry) {

console.log("structuredData.json not found in the zip.");

return;

}

const jsonData = structuredDataEntry.getData().toString("utf8");

const parsedData = JSON.parse(jsonData);

console.log("JSON Data:", util.inspect(parsedData, { depth: null }));

return parsedData.elements;

})

.catch((err) => {

if (err instanceof PDFServicesSdk.Error.ServiceApiError || err instanceof PDFServicesSdk.Error.ServiceUsageError) {

console.log("Exception encountered while executing operation", err);

} else {

console.log("Exception encountered while executing operation", err);

}

});

//Generates a string containing a directory structure and file name for the output file.

function createOutputFilePath(directory) {

let date = new Date();

let dateString =

date.getFullYear() +

"-" +

("0" + (date.getMonth() + 1)).slice(-2) +

"-" +

("0" + date.getDate()).slice(-2) +

"T" +

("0" + date.getHours()).slice(-2) +

"-" +

("0" + date.getMinutes()).slice(-2) +

"-" +

("0" + date.getSeconds()).slice(-2);

return path.join(directory, dateString + ".zip");

}

} catch (err) {

console.log("Exception encountered while executing operation", err);

}

},

Report · Aug 16, 2023

If you logout result, is it a FileRef? What object does it appear to be.

Also, if I were doing serverless stuff with our APIs, I'd skip the SDK and just hit the *super* simple REST API direct. Much more control that way.

Report · Aug 16, 2023

Oh, you are using saveAsStream: "result.saveToStream is not a function" Shouldn't it be writeToStream?

Report · Aug 16, 2023

You are correct, I dont know how i missed that reading it 100 times over

Report · Aug 17, 2023

No worries, glad you got it.

Report · Feb 08, 2024

Hi Raymond,

Appreciated if you could write a code for writeToStream, as I could not find any sample code for doing so.

As I want to convert the result (which should be FileRef) to writableStream directly, without needing to saveAsFile, because I could not have access right on the file system of the cloud's production environment.

Report · Feb 08, 2024

Eh.... I don't do a lot with streams. I have, but just barely. Have you checked the docs on streams? https://nodejs.org/api/stream.html

Report · Feb 09, 2024

Yes, after checked the docs on streams as you suggested, I solved the issue and it works now. Thanks Raymond 👍

Report · Feb 16, 2024

How did you fix it? Having the same issue?

Report · Feb 17, 2024

For my application, I have a form on client-side to upload a single PDF file and post it to the server-side endpoint,

then on server-side, return back to the client-side the unzipped CSV files only.

Here below is the server-server code for your reference :

'use strict'

const express = require('express')

const app = express()

const port = 8080

const PDFServicesSdk = require('@adobe/pdfservices-node-sdk')

const fs = require('fs')

const formidable = require('formidable')

const AdmZip = require('adm-zip')

const Stream = require('stream')

app.use('/static', express.static(__dirname + '/public'))

app.get('/', (req, res) => {

res.sendFile(__dirname + '/public/extract_pdf.html')

})

app.post('/extract_pdf_to_zip_api', function(req, res){

var form = new formidable.IncomingForm()

form.parse(req, (err, fields, files) => {

if (err) {

next(err)

}

var file = files['uploads_file[0]'][0]

var data = fs.readFileSync(file.filepath)

var stream = Stream.Readable.from(data)

const credentials = PDFServicesSdk.Credentials

.servicePrincipalCredentialsBuilder()

.withClientId(process.env.PDF_SERVICES_CLIENT_ID)

.withClientSecret(process.env.PDF_SERVICES_CLIENT_SECRET)

.build()

const executionContext = PDFServicesSdk.ExecutionContext.create(credentials)

const options = new PDFServicesSdk.ExtractPDF.options.ExtractPdfOptions.Builder()

.addElementsToExtract(PDFServicesSdk.ExtractPDF.options.ExtractElementType.TEXT, PDFServicesSdk.ExtractPDF.options.ExtractElementType.TABLES)

.addElementsToExtractRenditions(PDFServicesSdk.ExtractPDF.options.ExtractRenditionsElementType.TABLES)

.addTableStructureFormat(PDFServicesSdk.ExtractPDF.options.TableStructureType.CSV)

.build()

const extractPDFOperation = PDFServicesSdk.ExtractPDF.Operation.createNew()

var input = PDFServicesSdk.FileRef.createFromStream(stream, PDFServicesSdk.ExtractPDF.SupportedSourceFormat.pdf)

extractPDFOperation.setInput(input)

extractPDFOperation.setOptions(options)

extractPDFOperation.execute(executionContext)

.then((result) => {

var ws = new Stream

ws.writable = true

ws.buf_data = []

ws.write = function(buf) {

ws.buf_data.push(buf)

}

ws.end = function(buf) {

if(arguments.length) ws.write(buf)

ws.writable = false

ws.buf_data = Buffer.concat(ws.buf_data)

var zip = new AdmZip(ws.buf_data)

var entries = zip.getEntries()

var csv_array = []

for(let entry of entries) {

const buffer = entry.getData()

var obj = {}

if ( entry.entryName.includes('.csv') ){

obj['name'] = entry.entryName

obj['size_in_bytes'] = buffer.length

obj['content'] = buffer.toString("utf-8")

csv_array.push(obj)

}

var csv_json = JSON.stringify(csv_array)

res.send(csv_json)

}

result.writeToStream(ws)

})

.catch(err => console.log(err))

})

app.listen(port, () => {

console.log(`Example app listening on port ${port}`)

})

module.exports = app

.writeToStream on the fileRef returned from extractPDFOperation not working

1 Correct answer