Copy link to clipboard
Copied
I want to use the Acrobat Services API to extract text from a pdf.
First 2 steps works fine, but I get error in step 3, extract from pdf.
{"reason":"Validation error Request id: G7ueJEPebxWy6evaToK3LHrih1hyymVC.","message":"Bad Request"}
Have used Postman sample in SDK and Postman php curl code.
The code is on a commercial web site running FreeBSD.
$client_id and $client_secret is set directly before this code.
File apitest.txt follws after code. Any suggestions?
// Step 1 - Get token - from Postman
$fp = fopen("apitest.txt", "w");
$curl = curl_init();
$postfield = "client_id=".$client_id."&client_secret=".$client_secret;
curl_setopt_array($curl, array(
CURLOPT_URL => 'https://pdf-services.adobe.io/token',
CURLOPT_RETURNTRANSFER => true,
CURLOPT_ENCODING => '',
CURLOPT_MAXREDIRS => 10,
CURLOPT_TIMEOUT => 0,
CURLOPT_HEADER => FALSE,
CURLOPT_FOLLOWLOCATION => true,
CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
CURLOPT_CUSTOMREQUEST => 'POST',
CURLOPT_POSTFIELDS => $postfield,
CURLOPT_HTTPHEADER => array(
'Content-Type: application/x-www-form-urlencoded'
),
));
$response1 = curl_exec($curl);
fwrite($fp, "Postfield:\n".$postfield);
fwrite($fp, "\nResponse1:\n".$response1);
if(curl_error($curl)) fwrite($fp, "\ncurl error:".curl_error($curl)."\n");
curl_close($curl);
$result = explode("\"", $response1);
$token = $result[3];
// Step 2a - Upload Presigned Uri - from Postman
$curl = curl_init();
curl_setopt_array($curl, array(
CURLOPT_URL => 'https://pdf-services.adobe.io/assets',
CURLOPT_RETURNTRANSFER => true,
CURLOPT_ENCODING => '',
CURLOPT_MAXREDIRS => 10,
CURLOPT_TIMEOUT => 0,
CURLOPT_FOLLOWLOCATION => true,
CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
CURLOPT_CUSTOMREQUEST => 'POST',
CURLOPT_POSTFIELDS =>'{
"mediaType":"application/pdf"
}',
CURLOPT_HTTPHEADER => array(
'X-API-Key:'.$client_id,
'Authorization: Bearer '.$token,
'Content-Type: application/json'
),
));
$response2a = curl_exec($curl);
fwrite($fp, "\nResponse2a:\n".$response2a);
if(curl_error($curl)) fwrite($fp, "\ncurl error:".curl_error($curl)."\n");
curl_close($curl);
$result = explode("\"", $response2a);
$uploadUri = $result[3];
$assetID = $result[7];
// Step 2b - Upload Document - from Postman
$curl = curl_init();
curl_setopt_array($curl, array(
CURLOPT_URL => $uploadUri,
CURLOPT_RETURNTRANSFER => true,
CURLOPT_ENCODING => '',
CURLOPT_MAXREDIRS => 10,
CURLOPT_TIMEOUT => 0,
CURLOPT_FOLLOWLOCATION => true,
CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
CURLOPT_CUSTOMREQUEST => 'PUT',
CURLOPT_POSTFIELDS => file_get_contents("Vattenfall.pdf"),
CURLOPT_HTTPHEADER => array(
'Content-Type: application/pdf'
),
));
$response2b = curl_exec($curl);
fwrite($fp, "\nResponse2b:\n".$response2b);
if(curl_error($curl)) fwrite($fp, "\ncurl error:".curl_error($curl)."\n");
curl_close($curl);
// Step 3 - Extract PDF - from Postman
$curl = curl_init();
curl_setopt_array($curl, array(
CURLOPT_URL => 'https://pdf-services.adobe.io/operation/extractpdf/',
CURLOPT_RETURNTRANSFER => true,
CURLOPT_ENCODING => '',
CURLOPT_MAXREDIRS => 10,
CURLOPT_TIMEOUT => 0,
CURLOPT_FOLLOWLOCATION => true,
CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
CURLOPT_CUSTOMREQUEST => 'POST',
CURLOPT_POSTFIELDS =>'{
"assetID": $assetID,
"getCharBounds": "false",
"includeStyling": "false",
"elementsToExtract": [
"text",
"tables"
],
"tableOutputFormat": "xlsx",
"renditionsToExtract": [
"tables",
"figures"
]
}',
CURLOPT_HTTPHEADER => array(
'Authorization: Bearer '.$token,
'x-api-key: '.$client_id,
'Content-Type: application/json'
),
));
$response3 = curl_exec($curl);
fwrite($fp, "\nResponse3:\n".$response3);
if(curl_error($curl)) fwrite($fp, "\ncurl error:".curl_error($curl)."\n");
curl_close($curl);
Response1:
{"access_token":"eyJhbGciOiJSUzI1NiIsIng1dSI6Imltc19uYTEta2V5LWF0LTEuY2VyIiwia2lkIjoiaW1zX25hMS1rZXktYXQtMSIsIml0dCI6ImF0In0.eyJpZCI6IjE3MzY5Njc2MDQ1NzNfN2ZjZTY1NWUtNzU1OC00MWFkLTliNzEtYzE1YzU1YzQxZGQxX3VlMSIsIm9yZyI6IjYwMTcxRUQ4Njc4MjU4Q0IwQTQ5NUZCREBBZG9iZU9yZyIsInR5cGUiOiJhY2Nlc3NfdG9rZW4iLCJjbGllbnRfaWQiOiIyZjQzYjljNmEzYjM0YzkwYjBmZDNlYmRlZmRiYzI1NiIsInVzZXJfaWQiOiI2MTk2MUY1RTY3ODI1QjUyMEE0OTVGRDZAdGVjaGFjY3QuYWRvYmUuY29tIiwiYXMiOiJpbXMtbmExIiwiYWFfaWQiOiI2MTk2MUY1RTY3ODI1QjUyMEE0OTVGRDZAdGVjaGFjY3QuYWRvYmUuY29tIiwiY3RwIjozLCJtb2kiOiJhZTc3MmJiIiwiZXhwaXJlc19pbiI6Ijg2NDAwMDAwIiwic2NvcGUiOiJEQ0FQSSxvcGVuaWQsQWRvYmVJRCIsImNyZWF0ZWRfYXQiOiIxNzM2OTY3NjA0NTczIn0.Gzei15AUQV3srIC5v-qgm_qtM3QMka6OoqFK32lAgxsqMPSj-u2aGXNfqSL4LnynNqxE7djszP0gfSArC-Q5diZ-dyJ74UIMJwgmwi2GPBi0nrF0IR0xHAjaDiaH8SYhYqssW4VpZTKul0NDQBPUCmgye63j0eCNQkEghM_MbsZbqkuClMPJ_xMuH9RdBJPCEzenUGeANfc3e18MnD8zUAIn4b-rUAw6jCHK8f-7OvlOMe8ZZbNGnp6LFzXJua8nJ7sKT9lE09YNabjmXrTTb0NRliLzTTR0g583R6BKiTM_IfZIsHXMll8kL1O3dEIEh4twZoymxMfnbtwgk_JzAQ","token_type":"bearer","expires_in":86399}
Response2a:
{"uploadUri":"https://dcplatformstorageservice-prod-us-east-1.s3-accelerate.amazonaws.com/2f43b9c6a3b34c90b0fd3ebd...","assetID":"urn:aaid:AS:UE1:336eafed-1087-4224-936c-7305570bd6f1"}
Response2b:
Response3:
{"reason":"Validation error Request id: G7ueJEPebxWy6evaToK3LHrih1hyymVC.","message":"Bad Request"}
Copy link to clipboard
Copied
This is just too complicated. Maybe it had worked if there was some response or better documentation.
I have found another working solution - Ghostscript using device txtwrite works perfect for me. One simple command gives me what I want.
gs -sDEVICE=txtwrite -sOutputFile=xxx.txt xxx.pdf