• Global community
    • Language:
      • Deutsch
      • English
      • Español
      • Français
      • Português
  • 日本語コミュニティ
    Dedicated community for Japanese speakers
  • 한국 커뮤니티
    Dedicated community for Korean speakers
Exit
1

Downloading PDF Extract output.

Community Beginner ,
Aug 02, 2021 Aug 02, 2021

Copy link to clipboard

Copied

I'm having an issue while trying to use the PDF Extract API, even though it is working as expected I was not able to get the output as a ZIP file containing all the images and csv files. What I'm getting is a multipart response, but I'm un aware how to parse this response to get the indevidual content properly.

I would like to know if I'm doing something wrong or if I'm missing a step in the extraction process.

 

This is what I'm sending as the contentAnalyzerRequests

 

{
    "cpf:engine": {
        "repo:assetId": "urn:aaid:cpf:58af6e2c-1f0c-400d-9188-078000185695"
    },
    "cpf:inputs": {
        "documentIn": {
            "cpf:location": "InputFile0",
            "dc:format": "application/pdf"
        },
        "params": {
            "cpf:inline": {
                "elementsToExtract": ["text", "tables"],
                "renditionsToExtract": [ "tables", "figures"],
                "tableOutputFormat": "csv"
            }
        }
    },
    "cpf:outputs": {
        "elementsInfo": {
            "cpf:location": "jsonoutput",
            "dc:format": "application/json"
        },
        "elementsRenditions": {
            "cpf:location": "fileoutpart",
            "dc:format": "text/directory"
        }
    }
} 

Appriciate your help, Thanks in advance.

TOPICS
How to , PDF Extract API , PDF Services API

Views

1.5K

Translate

Translate

Report

Report
Community guidelines
Be kind and respectful, give credit to the original source of content, and search for duplicates before posting. Learn more
community guidelines
Community Beginner ,
Aug 15, 2021 Aug 15, 2021

Copy link to clipboard

Copied

Hi Adobe Team,

 

I am using the above mentioned "contentAnalyzerRequests"Now i am able to get the individual files, what changes needs to done in the contentAnalyzerRequest to receive the entire files as zip file?

 

 

Votes

Translate

Translate

Report

Report
Community guidelines
Be kind and respectful, give credit to the original source of content, and search for duplicates before posting. Learn more
community guidelines
Community Beginner ,
Aug 16, 2021 Aug 16, 2021

Copy link to clipboard

Copied

Hi,

were you able to parse and get the files from the multipart request, if so could you share how you did it please?

Votes

Translate

Translate

Report

Report
Community guidelines
Be kind and respectful, give credit to the original source of content, and search for duplicates before posting. Learn more
community guidelines
Community Beginner ,
Aug 17, 2021 Aug 17, 2021

Copy link to clipboard

Copied

LATEST

Hi Amila,

 

I am using couple "nuget" packages "Microsoft.AspNet.WebApi.Client" & "HttpMultipartParser" to achieve the same. Here you go:

--------------------------------------------------------------------------------------------------------------------------------

using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net;
using System.Reflection;
using System.Threading.Tasks;
using System.Net.Http;
using System.Net.Http.Headers;
using System.Text;
using System.Net.Http.Formatting;
using HttpMultipartParser;
using Newtonsoft.Json;
 
namespace DownloadZipFile
{
    public static class Program
    {
        static string filePath;
 
        static void Main(string[] args)
        {
            filePath = @"D:\Output";
            CallWebAPIAsync().Wait();
 
        }
 
        static async Task CallWebAPIAsync()
        {
            
            using (var client = new HttpClient())
            {
                client.BaseAddress = new Uri("https://cpf-ue1.adobe.io/ops/id/");
                client.DefaultRequestHeaders.Accept.Clear();
                client.DefaultRequestHeaders.Accept.Add(new MediaTypeWithQualityHeaderValue("application/json"));
                client.DefaultRequestHeaders.Accept.Add(new MediaTypeWithQualityHeaderValue("text/plain"));
                client.DefaultRequestHeaders.Add("x-api-key""{Your CLIENT ID)}");
                client.DefaultRequestHeaders.Add("Authorization""Bearer " + "{Your Authorization token}");
                //GET Method  
                System.Net.ServicePointManager.SecurityProtocol = System.Net.SecurityProtocolType.Tls | System.Net.SecurityProtocolType.Tls11 | System.Net.SecurityProtocolType.Tls12;
 
                HttpResponseMessage response = await client.GetAsync("{Your x-request-id}");
                if (response.IsSuccessStatusCode)
                {
 
                    var parser = MultipartFormDataParser.Parse(response.Content.ReadAsStreamAsync().Result);
                    string parserParameters = parser.Parameters[0].Data;
 
                   Root model = JsonConvert.DeserializeObject<Root>(parserParameters);
                   List<ElementsRendition> listElements = model.CpfOutputs.elementsRenditions;
                   
                    Dictionary<string,string> fileNameTypeDictionary = new Dictionary<string,string>();
                    foreach(ElementsRendition a in listElements)
                    {
                        fileNameTypeDictionary.Add(a.CpfLocation, a.DcFormat);
                    }
 
                    foreach (var f in parser.Files)
                    {
                        Stream data = f.Data;
                        string fileName = Path.Combine(filePath,"{0}.{1}");
                        string fileExtension = "json";
                        if (fileNameTypeDictionary.ContainsKey(f.Name)) fileExtension = fileNameTypeDictionary[f.Name];
                        switch(fileExtension)
                        {
                            case "text/csv":
                                File.WriteAllBytes(string.Format(fileName, f.Name, "csv"), ReadFully(data));
                                break;
                            case "image/png":
                                File.WriteAllBytes(string.Format(fileName, f.Name, "png"), ReadFully(data));
                                break;
                            default:
                                File.WriteAllBytes(string.Format(fileName, f.Name, "json"), ReadFully(data));
                                break;
 
                        }
 
                      
                     
                    }
 
 
                }
                else
                {
                    Console.WriteLine("Internal server Error");
                }
            }
        }
 
 
        public static byte[] ReadFully(Stream input)
        {
            byte[] buffer = new byte[16 * 1024];
            using (MemoryStream ms = new MemoryStream())
            {
                int read;
                while ((read = input.Read(buffer, 0, buffer.Length)) > 0)
                {
                    ms.Write(buffer, 0, read);
                }
                return ms.ToArray();
            }
        }
   
 
 
 
 
    }
 
    }
 ----------------------------------------------------------------------------------------------------------------------------------
using Newtonsoft.Json;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
 
namespace DownloadZipFile
{
    public class DocumentIn
    {
        [JsonProperty("cpf:location")]
        public string CpfLocation { getset; }
 
        [JsonProperty("dc:format")]
        public string DcFormat { getset; }
    }
 
    public class CpfInline
    {
        public List<string> elementsToExtract { getset; }
        public List<string> renditionsToExtract { getset; }
        public string tableOutputFormat { getset; }
    }
 
    public class Params
    {
        [JsonProperty("cpf:inline")]
        public CpfInline CpfInline { getset; }
    }
 
    public class CpfInputs
    {
        public DocumentIn documentIn { getset; }
        public Params @params { getset; }
    }
 
    public class CpfEngine
    {
        [JsonProperty("repo:assetId")]
        public string RepoAssetId { getset; }
    }
 
    public class CpfStatus
    {
        public bool completed { getset; }
        public string type { getset; }
        public int status { getset; }
    }
 
    public class ElementsRendition
    {
        [JsonProperty("cpf:location")]
        public string CpfLocation { getset; }
 
        [JsonProperty("dc:format")]
        public string DcFormat { getset; }
    }
 
    public class ElementsInfo
    {
        [JsonProperty("cpf:location")]
        public string CpfLocation { getset; }
 
        [JsonProperty("dc:format")]
        public string DcFormat { getset; }
    }
 
    public class CpfOutputs
    {
        public List<ElementsRendition> elementsRenditions { getset; }
        public ElementsInfo elementsInfo { getset; }
    }
 
    public class Root
    {
        [JsonProperty("cpf:inputs")]
        public CpfInputs CpfInputs { getset; }
 
        [JsonProperty("cpf:engine")]
        public CpfEngine CpfEngine { getset; }
 
        [JsonProperty("cpf:status")]
        public CpfStatus CpfStatus { getset; }
 
        [JsonProperty("cpf:outputs")]
        public CpfOutputs CpfOutputs { getset; }
    }
 
}

--------------------------------------------------------------------------------------------------------------------------------

I hope it will resolve you problems.

 

Votes

Translate

Translate

Report

Report
Community guidelines
Be kind and respectful, give credit to the original source of content, and search for duplicates before posting. Learn more
community guidelines
Resources