• Global community
    • Language:
      • Deutsch
      • English
      • Español
      • Français
      • Português
  • 日本語コミュニティ
    Dedicated community for Japanese speakers
  • 한국 커뮤니티
    Dedicated community for Korean speakers
Exit
0

"BAD_PDF_FILE_TYPE" status for any PDF file

New Here ,
Jul 05, 2024 Jul 05, 2024

Copy link to clipboard

Copied

I am google script to run AdobePDF Extract API. But the job is getting 'failed'. please see the detail posted below.

function pdfExtractUmbrella(fileBlob) {
const username = Session.getActiveUser().getEmail();
Logger.log("username: " + username);
const user = apiExecutableVerifyUser(username); // user json
Logger.log("user json inside pdfExtractUmbrella: " + JSON.stringify(user));

if (user) {
..........
..........
..........
..........
const preSignedUriResponse = apiExecutableGetUploadPresignedUri(content_type);
Logger.log('preSignedUriResponse: ' + JSON.stringify(preSignedUriResponse));
const uploadUri = preSignedUriResponse.uploadUri;
const assetID = preSignedUriResponse.assetID;
const uploadStatus = apiExecutableUploadFileToCloud(uploadUri, fileBlob, content_type);
Logger.log('uploadStatus: ' + uploadStatus);
const content_json = pdfExtractFn(assetID);
Logger.log("content_json stringified inside pdfExtractUmbrella = " + JSON.stringify(content_json));
const contentDownloadUri = content_json.content.downloadUri;
Logger.log("contentDownloadUri = " + contentDownloadUri);
const extracted_pdf_contents_folder_id = globalVariables().extracted_pdf_contents_folder_id;
const extracted_pdf_contents_folder = DriveApp.getFolderById(extracted_pdf_contents_folder_id);
const this_pdf_extract_folder = extracted_pdf_contents_folder.createFolder(inner_folder_name);
const this_pdf_extract_folder_id = this_pdf_extract_folder.getId();
downloadAndSaveFile(contentDownloadUri, this_pdf_extract_folder_id, landingFileName);
}
}

function pdfExtractFn(assetID) {
const accessToken = apiExecutableGetAccessToken();
Logger.log("accessToken: " + accessToken);
const url_pdfExtractFn = "https://pdf-services-ue1.adobe.io/operation/extractpdf";
const payload = {
"assetID": assetID,
"getCharBounds": false,
"includeStyling": false,
"elementsToExtract": [
"text"
],
"includeHeaderFooter": true,
"tagEncapsulatedText": [
"Figure"
],
"notifiers": []
};

const options = {
"method": "post",
"headers": {
"Authorization": `Bearer ${accessToken}`,
"X-API-Key": globalVariables().clientId,
"Content-Type": "application/json"
},
"payload": JSON.stringify(payload),
"muteHttpExceptions": false
};

Logger.log("Request payload: " + JSON.stringify(payload));
Logger.log("Request headers: " + JSON.stringify(options.headers));

const pdfExtractFn_response = UrlFetchApp.fetch(url_pdfExtractFn, options);

Logger.log("pdfExtractFn_response: " + JSON.stringify(pdfExtractFn_response));
Logger.log("pdfExtractFn_response.getResponseCode() = " + pdfExtractFn_response.getResponseCode());
Logger.log("pdfExtractFn_response.getContentText() = " + pdfExtractFn_response.getContentText());
Logger.log("pdfExtractFn_response.getContentText() stringify = " + JSON.stringify(pdfExtractFn_response.getContentText()));

// Inspect the headers
const headers = pdfExtractFn_response.getHeaders();
Logger.log("Response headers: " + JSON.stringify(headers));

if (pdfExtractFn_response.getResponseCode() === 201) {
// Check if the job ID is in the headers
const location = headers.Location || headers.location; // Some servers may use 'location' instead of 'Location'
if (location) {
const jobIdMatch = /extractpdf\/([^\/]*)\/status$/.exec(location);
const jobId = jobIdMatch ? jobIdMatch[1] : undefined;

if (jobId) {
Logger.log(`Job ID: ${jobId}`);
return jobStatusFn(jobId, accessToken);
} else {
throw new Error("Job ID could not be extracted from the Location header.");
}
} else {
throw new Error("Location header not found in the response.");
}
} else {
throw new Error(`Error creating extract PDF job: ${pdfExtractFn_response.getContentText()}`);
}
}

function jobStatusFn(jobId, accessToken) {
const url_jobStatusFn = `https://pdf-services-ue1.adobe.io/operation/extractpdf/${jobId}/status`;
const options = {
"method": "get",
"headers": {
"Authorization": `Bearer ${accessToken}`,
"X-API-Key": globalVariables().clientId
}
};

let response;
let jobStatus;

do {
response = UrlFetchApp.fetch(url_jobStatusFn, options);
Logger.log("Job status response: " + response.getContentText());
jobStatus = JSON.parse(response.getContentText()).status;
Logger.log(`Job status: ${jobStatus}`);
Utilities.sleep(10000); // Sleep for 10 seconds between checks
} while (jobStatus === "in progress");

if (jobStatus === "done") {
const data = JSON.parse(response.getContentText());
const content_json = {
content: data.content,
};
Logger.log("content_json stringified = " + JSON.stringify(content_json));
return content_json;
} else {
const errorMessage = JSON.parse(response.getContentText()).errorMessage || "Unknown error";
throw new Error(`Error: Job failed with status: ${jobStatus}, message: ${errorMessage}`);
}
}


function checkPdfExtractUmbrella(){
const fileId = "xxxxxxxxxxxxxxxxxxxxxxxx";
const file = DriveApp.getFileById(fileId);
const fileBlob = file.getBlob();
pdfExtractUmbrella(fileBlob)
}


function checkPdfExtract() {
const assetID = "urn:aaid:AS:UE1:c35d4125-62fc-45e7-bd6f-449cb6448a16";
try {
pdfExtractFn(assetID);
} catch (error) {
Logger.log("Error in checkPdfExtract: " + error.message);
}
}






function downloadAndSaveFile(downloadUri, folderId, fileName) {
const response = UrlFetchApp.fetch(downloadUri);
const blob = response.getBlob();

// Get the Google Drive folder
const folder = DriveApp.getFolderById(folderId);

// Save the file to the folder
const file = folder.createFile(blob.setName(fileName));

return file;
}


when i am executing 'checkPdfExtractUmbrella()', i am getting-
Job status response: {"error":{"code":"BAD_PDF_FILE_TYPE","message":"BAD_PDF - Unable to extract content.: The input file is not a PDF file","status":400},"status":"failed"}
2:11:14 AM Info Job status: failed


why am i getting '"BAD_PDF_FILE_TYPE"' status for any PDF file that I am uploading?

----------------------------------------------------------

2:10:37 AM Notice Execution started
2:10:37 AM Info username: apurb@realtimeinfra.in
2:10:42 AM Info API Response Code: 200
2:10:57 AM Info Data stringified: {"uploadUri":"https://dcplatformstorageservice-prod-us-east-1.s3-accelerate.amazonaws.com/vouoyvph;bjgyvfljb","assetID":"urn:aaid:AS:UE1:41e6fbbf-befd-49a2-aa3c-f1369ff054c6"}
2:10:57 AM Info Data.uploadUri: https://dcplatformstorageservice-prod-us-east-1.s3-accelerate.amazonaws.com/vouoyvph;bjgyvfljb
2:10:57 AM Info Data.assetID: urn:aaid:AS:UE1:41e6fbbf-befd-49a2-aa3c-f1369ff054c6
2:10:57 AM Info preSignedUriResponse: {"uploadUri":"https://dcplatformstorageservice-prod-us-east-1.s3-accelerate.amazonaws.com/vouoyvph;bjgyvfljb","assetID":"urn:aaid:AS:UE1:41e6fbbf-befd-49a2-aa3c-f1369ff054c6"}
2:10:59 AM Info response: {
"done": true,
"response": {
"@type": "type.googleapis.com/google.apps.script.v1.ExecutionResponse"
}
}
2:10:59 AM Info result: [object Object]
2:10:59 AM Info Success: file_uploaded
2:10:59 AM Info uploadStatus: file_uploaded
2:11:02 AM Info accessToken: awdpdossdpvosadvknasdv
2:11:02 AM Info Request payload: {"assetID":"urn:aaid:AS:UE1:41e6fbbf-befd-49a2-aa3c-f1369ff054c6","getCharBounds":false,"includeStyling":false,"elementsToExtract":["text"],"includeHeaderFooter":true,"tagEncapsulatedText":["Figure"],"notifiers":[]}
2:11:02 AM Info Request headers: {"Authorization":"Bearer awdpdossdpvosadvknasdv","X-API-Key":";iug;ugogssdpvsgdv;soddivhsdvoihisdv","Content-Type":"application/json"}
2:11:03 AM Info pdfExtractFn_response: {}
2:11:03 AM Info pdfExtractFn_response.getResponseCode() = 201
2:11:03 AM Info pdfExtractFn_response.getContentText() =
2:11:03 AM Info pdfExtractFn_response.getContentText() stringify = ""
2:11:03 AM Info Response headers: {"Access-Control-Max-Age":"60","Connection":"keep-alive","Access-Control-Allow-Methods":"GET, POST, PUT, DELETE, OPTIONS","Retry-After":"1","Access-Control-Allow-Credentials":"true","Access-Control-Expose-Headers":"*","Date":"Fri, 05 Jul 2024 20:41:03 GMT","Access-Control-Allow-Headers":"Authorization,Content-Type,X-Api-Key,User-Agent,If-Modified-Since,x-api-app-info","Transfer-Encoding":"chunked","X-Request-ID":"8DN5gsLUmG0ChkvkYkeC3RL1kyaZ62D5","Location":"https://pdf-services-ue1.adobe.io/operation/extractpdf/8DN5gsLUmG0ChkvkYkeC3RL1kyaZ62D5/status","Server":"openresty","Access-Control-Allow-Origin":"*"}
2:11:03 AM Info Job ID: 8DN5gsLUmG0ChkvkYkeC3RL1kyaZ62D5
2:11:04 AM Info Job status response: {"status":"in progress"}
2:11:04 AM Info Job status: in progress
2:11:14 AM Info Job status response: {"error":{"code":"BAD_PDF_FILE_TYPE","message":"BAD_PDF - Unable to extract content.: The input file is not a PDF file","status":400},"status":"failed"}
2:11:14 AM Info Job status: failed
2:11:24 AM Error
Error: Error: Job failed with status: failed, message: Unknown error
jobStatusFn @ Code.gs:156
pdfExtractFn @ Code.gs:114
pdfExtractUmbrella @ Code.gs:50
checkPdfExtractUmbrella @ Code.gs:165

Views

39

Translate

Translate

Report

Report
Community guidelines
Be kind and respectful, give credit to the original source of content, and search for duplicates before posting. Learn more
community guidelines
no replies

Have something to add?

Join the conversation
Resources