Copy link to clipboard
Copied
I'm having an issue while trying to use the PDF Extract API, even though it is working as expected I was not able to get the output as a ZIP file containing all the images and csv files. What I'm getting is a multipart response, but I'm un aware how to parse this response to get the indevidual content properly.
I would like to know if I'm doing something wrong or if I'm missing a step in the extraction process.
This is what I'm sending as the contentAnalyzerRequests
{
"cpf:engine": {
"repo:assetId": "urn:aaid:cpf:58af6e2c-1f0c-400d-9188-078000185695"
},
"cpf:inputs": {
"documentIn": {
"cpf:location": "InputFile0",
"dc:format": "application/pdf"
},
"params": {
"cpf:inline": {
"elementsToExtract": ["text", "tables"],
"renditionsToExtract": [ "tables", "figures"],
"tableOutputFormat": "csv"
}
}
},
"cpf:outputs": {
"elementsInfo": {
"cpf:location": "jsonoutput",
"dc:format": "application/json"
},
"elementsRenditions": {
"cpf:location": "fileoutpart",
"dc:format": "text/directory"
}
}
}
Appriciate your help, Thanks in advance.
Copy link to clipboard
Copied
Hi Adobe Team,
I am using the above mentioned "contentAnalyzerRequests". Now i am able to get the individual files, what changes needs to done in the contentAnalyzerRequest to receive the entire files as zip file?
Copy link to clipboard
Copied
Hi,
were you able to parse and get the files from the multipart request, if so could you share how you did it please?
Copy link to clipboard
Copied
Hi Amila,
I am using couple "nuget" packages "Microsoft.AspNet.WebApi.Client" & "HttpMultipartParser" to achieve the same. Here you go:
--------------------------------------------------------------------------------------------------------------------------------
using System; using System.Collections.Generic; using System.IO; using System.Linq; using System.Net; using System.Reflection; using System.Threading.Tasks; using System.Net.Http; using System.Net.Http.Headers; using System.Text; using System.Net.Http.Formatting; using HttpMultipartParser; using Newtonsoft.Json; namespace DownloadZipFile { public static class Program { static string filePath; static void Main(string[] args) { filePath = @"D:\Output"; CallWebAPIAsync().Wait(); } static async Task CallWebAPIAsync() { using (var client = new HttpClient()) { client.BaseAddress = new Uri("https://cpf-ue1.adobe.io/ops/id/"); client.DefaultRequestHeaders.Accept.Clear(); client.DefaultRequestHeaders.Accept.Add(new MediaTypeWithQualityHeaderValue("application/json")); client.DefaultRequestHeaders.Accept.Add(new MediaTypeWithQualityHeaderValue("text/plain")); client.DefaultRequestHeaders.Add("x-api-key", "{Your CLIENT ID)}"); client.DefaultRequestHeaders.Add("Authorization", "Bearer " + "{Your Authorization token}"); //GET Method System.Net.ServicePointManager.SecurityProtocol = System.Net.SecurityProtocolType.Tls | System.Net.SecurityProtocolType.Tls11 | System.Net.SecurityProtocolType.Tls12; HttpResponseMessage response = await client.GetAsync("{Your x-request-id}"); if (response.IsSuccessStatusCode) { var parser = MultipartFormDataParser.Parse(response.Content.ReadAsStreamAsync().Result); string parserParameters = parser.Parameters[0].Data; Root model = JsonConvert.DeserializeObject<Root>(parserParameters); List<ElementsRendition> listElements = model.CpfOutputs.elementsRenditions; Dictionary<string,string> fileNameTypeDictionary = new Dictionary<string,string>(); foreach(ElementsRendition a in listElements) { fileNameTypeDictionary.Add(a.CpfLocation, a.DcFormat); } foreach (var f in parser.Files) { Stream data = f.Data; string fileName = Path.Combine(filePath,"{0}.{1}"); string fileExtension = "json"; if (fileNameTypeDictionary.ContainsKey(f.Name)) fileExtension = fileNameTypeDictionary[f.Name]; switch(fileExtension) { case "text/csv": File.WriteAllBytes(string.Format(fileName, f.Name, "csv"), ReadFully(data)); break; case "image/png": File.WriteAllBytes(string.Format(fileName, f.Name, "png"), ReadFully(data)); break; default: File.WriteAllBytes(string.Format(fileName, f.Name, "json"), ReadFully(data)); break; } } } else { Console.WriteLine("Internal server Error"); } } } public static byte[] ReadFully(Stream input) { byte[] buffer = new byte[16 * 1024]; using (MemoryStream ms = new MemoryStream()) { int read; while ((read = input.Read(buffer, 0, buffer.Length)) > 0) { ms.Write(buffer, 0, read); } return ms.ToArray(); } } } } ----------------------------------------------------------------------------------------------------------------------------------
using Newtonsoft.Json; using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; namespace DownloadZipFile { public class DocumentIn { [JsonProperty("cpf:location")] public string CpfLocation { get; set; } [JsonProperty("dc:format")] public string DcFormat { get; set; } } public class CpfInline { public List<string> elementsToExtract { get; set; } public List<string> renditionsToExtract { get; set; } public string tableOutputFormat { get; set; } } public class Params { [JsonProperty("cpf:inline")] public CpfInline CpfInline { get; set; } } public class CpfInputs { public DocumentIn documentIn { get; set; } public Params @params { get; set; } } public class CpfEngine { [JsonProperty("repo:assetId")] public string RepoAssetId { get; set; } } public class CpfStatus { public bool completed { get; set; } public string type { get; set; } public int status { get; set; } } public class ElementsRendition { [JsonProperty("cpf:location")] public string CpfLocation { get; set; } [JsonProperty("dc:format")] public string DcFormat { get; set; } } public class ElementsInfo { [JsonProperty("cpf:location")] public string CpfLocation { get; set; } [JsonProperty("dc:format")] public string DcFormat { get; set; } } public class CpfOutputs { public List<ElementsRendition> elementsRenditions { get; set; } public ElementsInfo elementsInfo { get; set; } } public class Root { [JsonProperty("cpf:inputs")] public CpfInputs CpfInputs { get; set; } [JsonProperty("cpf:engine")] public CpfEngine CpfEngine { get; set; } [JsonProperty("cpf:status")] public CpfStatus CpfStatus { get; set; } [JsonProperty("cpf:outputs")] public CpfOutputs CpfOutputs { get; set; } } }
--------------------------------------------------------------------------------------------------------------------------------
I hope it will resolve you problems.