Copy link to clipboard
Copied
I am google script to run AdobePDF Extract API. But the job is getting 'failed'. please see the detail posted below.
function pdfExtractUmbrella(fileBlob) {
const username = Session.getActiveUser().getEmail();
Logger.log("username: " + username);
const user = apiExecutableVerifyUser(username); // user json
Logger.log("user json inside pdfExtractUmbrella: " + JSON.stringify(user));
if (user) {
..........
..........
..........
..........
const preSignedUriResponse = apiExecutableGetUploadPresignedUri(content_type);
Logger.log('preSignedUriResponse: ' + JSON.stringify(preSignedUriResponse));
const uploadUri = preSignedUriResponse.uploadUri;
const assetID = preSignedUriResponse.assetID;
const uploadStatus = apiExecutableUploadFileToCloud(uploadUri, fileBlob, content_type);
Logger.log('uploadStatus: ' + uploadStatus);
const content_json = pdfExtractFn(assetID);
Logger.log("content_json stringified inside pdfExtractUmbrella = " + JSON.stringify(content_json));
const contentDownloadUri = content_json.content.downloadUri;
Logger.log("contentDownloadUri = " + contentDownloadUri);
const extracted_pdf_contents_folder_id = globalVariables().extracted_pdf_contents_folder_id;
const extracted_pdf_contents_folder = DriveApp.getFolderById(extracted_pdf_contents_folder_id);
const this_pdf_extract_folder = extracted_pdf_contents_folder.createFolder(inner_folder_name);
const this_pdf_extract_folder_id = this_pdf_extract_folder.getId();
downloadAndSaveFile(contentDownloadUri, this_pdf_extract_folder_id, landingFileName);
}
}
function pdfExtractFn(assetID) {
const accessToken = apiExecutableGetAccessToken();
Logger.log("accessToken: " + accessToken);
const url_pdfExtractFn = "https://pdf-services-ue1.adobe.io/operation/extractpdf";
const payload = {
"assetID": assetID,
"getCharBounds": false,
"includeStyling": false,
"elementsToExtract": [
"text"
],
"includeHeaderFooter": true,
"tagEncapsulatedText": [
"Figure"
],
"notifiers": []
};
const options = {
"method": "post",
"headers": {
"Authorization": `Bearer ${accessToken}`,
"X-API-Key": globalVariables().clientId,
"Content-Type": "application/json"
},
"payload": JSON.stringify(payload),
"muteHttpExceptions": false
};
Logger.log("Request payload: " + JSON.stringify(payload));
Logger.log("Request headers: " + JSON.stringify(options.headers));
const pdfExtractFn_response = UrlFetchApp.fetch(url_pdfExtractFn, options);
Logger.log("pdfExtractFn_response: " + JSON.stringify(pdfExtractFn_response));
Logger.log("pdfExtractFn_response.getResponseCode() = " + pdfExtractFn_response.getResponseCode());
Logger.log("pdfExtractFn_response.getContentText() = " + pdfExtractFn_response.getContentText());
Logger.log("pdfExtractFn_response.getContentText() stringify = " + JSON.stringify(pdfExtractFn_response.getContentText()));
// Inspect the headers
const headers = pdfExtractFn_response.getHeaders();
Logger.log("Response headers: " + JSON.stringify(headers));
if (pdfExtractFn_response.getResponseCode() === 201) {
// Check if the job ID is in the headers
const location = headers.Location || headers.location; // Some servers may use 'location' instead of 'Location'
if (location) {
const jobIdMatch = /extractpdf\/([^\/]*)\/status$/.exec(location);
const jobId = jobIdMatch ? jobIdMatch[1] : undefined;
if (jobId) {
Logger.log(`Job ID: ${jobId}`);
return jobStatusFn(jobId, accessToken);
} else {
throw new Error("Job ID could not be extracted from the Location header.");
}
} else {
throw new Error("Location header not found in the response.");
}
} else {
throw new Error(`Error creating extract PDF job: ${pdfExtractFn_response.getContentText()}`);
}
}
function jobStatusFn(jobId, accessToken) {
const url_jobStatusFn = `https://pdf-services-ue1.adobe.io/operation/extractpdf/${jobId}/status`;
const options = {
"method": "get",
"headers": {
"Authorization": `Bearer ${accessToken}`,
"X-API-Key": globalVariables().clientId
}
};
let response;
let jobStatus;
do {
response = UrlFetchApp.fetch(url_jobStatusFn, options);
Logger.log("Job status response: " + response.getContentText());
jobStatus = JSON.parse(response.getContentText()).status;
Logger.log(`Job status: ${jobStatus}`);
Utilities.sleep(10000); // Sleep for 10 seconds between checks
} while (jobStatus === "in progress");
if (jobStatus === "done") {
const data = JSON.parse(response.getContentText());
const content_json = {
content: data.content,
};
Logger.log("content_json stringified = " + JSON.stringify(content_json));
return content_json;
} else {
const errorMessage = JSON.parse(response.getContentText()).errorMessage || "Unknown error";
throw new Error(`Error: Job failed with status: ${jobStatus}, message: ${errorMessage}`);
}
}
function checkPdfExtractUmbrella(){
const fileId = "xxxxxxxxxxxxxxxxxxxxxxxx";
const file = DriveApp.getFileById(fileId);
const fileBlob = file.getBlob();
pdfExtractUmbrella(fileBlob)
}
function checkPdfExtract() {
const assetID = "urn:aaid:AS:UE1:c35d4125-62fc-45e7-bd6f-449cb6448a16";
try {
pdfExtractFn(assetID);
} catch (error) {
Logger.log("Error in checkPdfExtract: " + error.message);
}
}
function downloadAndSaveFile(downloadUri, folderId, fileName) {
const response = UrlFetchApp.fetch(downloadUri);
const blob = response.getBlob();
// Get the Google Drive folder
const folder = DriveApp.getFolderById(folderId);
// Save the file to the folder
const file = folder.createFile(blob.setName(fileName));
return file;
}
when i am executing 'checkPdfExtractUmbrella()', i am getting-
Job status response: {"error":{"code":"BAD_PDF_FILE_TYPE","message":"BAD_PDF - Unable to extract content.: The input file is not a PDF file","status":400},"status":"failed"}
2:11:14 AM Info Job status: failed
why am i getting '"BAD_PDF_FILE_TYPE"' status for any PDF file that I am uploading?
----------------------------------------------------------
2:10:37 AM Notice Execution started
2:10:37 AM Info username: apurb@realtimeinfra.in
2:10:42 AM Info API Response Code: 200
2:10:57 AM Info Data stringified: {"uploadUri":"https://dcplatformstorageservice-prod-us-east-1.s3-accelerate.amazonaws.com/vouoyvph;bjgyvfljb","assetID":"urn:aaid:AS:UE1:41e6fbbf-befd-49a2-aa3c-f1369ff054c6"}
2:10:57 AM Info Data.uploadUri: https://dcplatformstorageservice-prod-us-east-1.s3-accelerate.amazonaws.com/vouoyvph;bjgyvfljb
2:10:57 AM Info Data.assetID: urn:aaid:AS:UE1:41e6fbbf-befd-49a2-aa3c-f1369ff054c6
2:10:57 AM Info preSignedUriResponse: {"uploadUri":"https://dcplatformstorageservice-prod-us-east-1.s3-accelerate.amazonaws.com/vouoyvph;bjgyvfljb","assetID":"urn:aaid:AS:UE1:41e6fbbf-befd-49a2-aa3c-f1369ff054c6"}
2:10:59 AM Info response: {
"done": true,
"response": {
"@type": "type.googleapis.com/google.apps.script.v1.ExecutionResponse"
}
}
2:10:59 AM Info result: [object Object]
2:10:59 AM Info Success: file_uploaded
2:10:59 AM Info uploadStatus: file_uploaded
2:11:02 AM Info accessToken: awdpdossdpvosadvknasdv
2:11:02 AM Info Request payload: {"assetID":"urn:aaid:AS:UE1:41e6fbbf-befd-49a2-aa3c-f1369ff054c6","getCharBounds":false,"includeStyling":false,"elementsToExtract":["text"],"includeHeaderFooter":true,"tagEncapsulatedText":["Figure"],"notifiers":[]}
2:11:02 AM Info Request headers: {"Authorization":"Bearer awdpdossdpvosadvknasdv","X-API-Key":";iug;ugogssdpvsgdv;soddivhsdvoihisdv","Content-Type":"application/json"}
2:11:03 AM Info pdfExtractFn_response: {}
2:11:03 AM Info pdfExtractFn_response.getResponseCode() = 201
2:11:03 AM Info pdfExtractFn_response.getContentText() =
2:11:03 AM Info pdfExtractFn_response.getContentText() stringify = ""
2:11:03 AM Info Response headers: {"Access-Control-Max-Age":"60","Connection":"keep-alive","Access-Control-Allow-Methods":"GET, POST, PUT, DELETE, OPTIONS","Retry-After":"1","Access-Control-Allow-Credentials":"true","Access-Control-Expose-Headers":"*","Date":"Fri, 05 Jul 2024 20:41:03 GMT","Access-Control-Allow-Headers":"Authorization,Content-Type,X-Api-Key,User-Agent,If-Modified-Since,x-api-app-info","Transfer-Encoding":"chunked","X-Request-ID":"8DN5gsLUmG0ChkvkYkeC3RL1kyaZ62D5","Location":"https://pdf-services-ue1.adobe.io/operation/extractpdf/8DN5gsLUmG0ChkvkYkeC3RL1kyaZ62D5/status","Server":"openresty","Access-Control-Allow-Origin":"*"}
2:11:03 AM Info Job ID: 8DN5gsLUmG0ChkvkYkeC3RL1kyaZ62D5
2:11:04 AM Info Job status response: {"status":"in progress"}
2:11:04 AM Info Job status: in progress
2:11:14 AM Info Job status response: {"error":{"code":"BAD_PDF_FILE_TYPE","message":"BAD_PDF - Unable to extract content.: The input file is not a PDF file","status":400},"status":"failed"}
2:11:14 AM Info Job status: failed
2:11:24 AM Error
Error: Error: Job failed with status: failed, message: Unknown error
jobStatusFn @ Code.gs:156
pdfExtractFn @ Code.gs:114
pdfExtractUmbrella @ Code.gs:50
checkPdfExtractUmbrella @ Code.gs:165
Have something to add?
Find more inspiration, events, and resources on the new Adobe Community
Explore Now