Copy link to clipboard
Copied
Hi everyone,
Just a query question to see if I’m doing this correctly (most likely not I suspect) and apologies for the size of this question. ![]()
I have a project where I import approximately 250,000 XML attributes into an empty InDesign document (all the data is used to build a product). The import is instant, and I can access all XML content as a massive string without any delay. However, when I try to collate each attribute into their respective objects (based on the XML structure) the script slows to a crawl (i.e. over an hour to process).
I know this is because I am iterating through every attribute individually and pushing them into their logically associated object and property. But what I don’t know is if this is the most efficient way to do this via ESTK JavaScript.
The XML is 8 element levels deep and expands in volume the deeper you go (e.g. xmlElement[0] only has 2 instances but xmlElement[7] could have 1,000s).
I’ve managed to access and sort all the data by nesting loops at each level by doing this monstrosity you see below:
// Element Level 1
for (var myEl1 = 0; myEl1 <= app.activeDocument.xmlElements.length - 1; myEl1++) {
// Attributes
for (var myAtt = 0; myAtt <= app.activeDocument.xmlElements[myEl1].xmlAttributes.length - 1; myAtt++) {
// Collect data here.
}
// Element Level 2
for (var myEl2 = 0; myEl2 <= app.activeDocument.xmlElements[myEl1].xmlElements.length - 1; myEl2++) {
// Attributes
for (var myAtt = 0; myAtt <= app.activeDocument.xmlElements[myEl1].xmlElements[myEl2].xmlAttributes.length - 1; myAtt++) {
// Collect data here.
}
// Element Level 3
for (var myEl3 = 0; myEl3 <= app.activeDocument.xmlElements[myEl1].xmlElements[myEl2].xmlElements.length - 1; myEl3++) {
// Attributes
for (var myAtt = 0; myAtt <= app.activeDocument.xmlElements[myEl1].xmlElements[myEl2].xmlElements[myEl3].xmlAttributes.length - 1; myAtt++) {
// Collect data here.
}
// Level 4
for (var myEl4 = 0; myEl4 <= app.activeDocument.xmlElements[myEl1].xmlElements[myEl2].xmlElements[myEl3].xmlElements.length - 1; myEl4++) {
// Attributes
for (var myAtt = 0; myAtt <= app.activeDocument.xmlElements[myEl1].xmlElements[myEl2].xmlElements[myEl3].xmlElements[myEl4].xmlAttributes.length - 1; myAtt++) {
// Collect data here.
}
// Element Level 5
for (var myEl5 = 0; myEl5 <= app.activeDocument.xmlElements[myEl1].xmlElements[myEl2].xmlElements[myEl3].xmlElements[myEl4].xmlElements.length - 1; myEl5++) {
// Attributes
for (var myAtt = 0; myAtt <= app.activeDocument.xmlElements[myEl1].xmlElements[myEl2].xmlElements[myEl3].xmlElements[myEl4].xmlElements[myEl5].xmlAttributes.length - 1; myAtt++) {
// Collect data here.
}
// Element Level 6
for (var myEl6 = 0; myEl6 <= app.activeDocument.xmlElements[myEl1].xmlElements[myEl2].xmlElements[myEl3].xmlElements[myEl4].xmlElements[myEl5].xmlElements.length - 1; myEl6++) {
// Attributes
for (var myAtt = 0; myAtt <= app.activeDocument.xmlElements[myEl1].xmlElements[myEl2].xmlElements[myEl3].xmlElements[myEl4].xmlElements[myEl5].xmlElements[myEl6].xmlAttributes.length - 1; myAtt++) {
// Collect data here.
}
// Element Level 7
for (var myEl7 = 0; myEl7 <= app.activeDocument.xmlElements[myEl1].xmlElements[myEl2].xmlElements[myEl3].xmlElements[myEl4].xmlElements[myEl5].xmlElements[myEl6].xmlElements.length - 1; myEl7++) {
// Attributes
for (var myAtt = 0; myAtt <= app.activeDocument.xmlElements[myEl1].xmlElements[myEl2].xmlElements[myEl3].xmlElements[myEl4].xmlElements[myEl5].xmlElements[myEl6].xmlElements[myEl7].xmlAttributes.length - 1; myAtt++) {
// Collect data here.
}
// Element Level 8
for (var myEl8 = 0; myEl8 <= app.activeDocument.xmlElements[myEl1].xmlElements[myEl2].xmlElements[myEl3].xmlElements[myEl4].xmlElements[myEl5].xmlElements[myEl6].xmlElements[myEl7].xmlElements.length - 1; myEl8++) {
// Attributes
for (var myAtt = 0; myAtt <= app.activeDocument.xmlElements[myEl1].xmlElements[myEl2].xmlElements[myEl3].xmlElements[myEl4].xmlElements[myEl5].xmlElements[myEl6].xmlElements[myEl7].xmlElements[myEl8].xmlAttributes.length - 1; myAtt++) {
// Collect data here.
}
}
}
}
}
}
}
}
}
Does anyone have any insights if there is a better solution?
FYI I have tried to use web JavaScript XML to JSON solutions without success as I don't think any of those solutions were compatible with the ESTK.
Thanks
Brett
Hi,
Wow wow wow, no, don't do that
If you only need to collect the attributes contents, I would definitively advise to do this outside of InDesign i.e. through a XML object instantiation.
And certainly not through a loop of loop of loop of loop…You have something far better to use here : xpath.
...function main() {
var doc, f, x, attrs;
if ( !app.documents.length ) return;
f = new File ( Folder.desktop+"/attributes.xml" );
doc = app.activeDocument;
doc.exportFile ( ExportFormat.XML, f )
Copy link to clipboard
Copied
Hi,
Wow wow wow, no, don't do that
If you only need to collect the attributes contents, I would definitively advise to do this outside of InDesign i.e. through a XML object instantiation.
And certainly not through a loop of loop of loop of loop…You have something far better to use here : xpath.
function main() {
var doc, f, x, attrs;
if ( !app.documents.length ) return;
f = new File ( Folder.desktop+"/attributes.xml" );
doc = app.activeDocument;
doc.exportFile ( ExportFormat.XML, f );
f.open('r');
x = XML ( f.read() );
f.close();
attrs = x.xpath ("//*/@*"); //return a XMLList object
}
main();
If you need to stay within InDesign (reach pageItems for example), you can use this :
function main() {
var doc, root, xes, n, attrs = [];
if ( !app.documents.length ) return;
doc = app.activeDocument;
root = doc.xmlElements[0];
//"//*/@*" won't work here
xes = root.evaluateXPathExpression ( "//*" ); //returns an array of InDesigN XML Elements
n = xes.length;
while ( n-- ) {
attrs = attrs.concat ( xes
.xmlAttributes.everyItem().getElements() ); }
alert( attrs );
}
main();
HTH,
Loïc
Copy link to clipboard
Copied
Wow wow wow, no, don't do that
Hahaha... this was exactly what I was thinking but I was in the predicament of "just get it working" when I wrote it. ![]()
Thanks for the advice and sample script, unfortunately I do have to work within InDesign as it is the only tool I have access to. Once I have the process working with my data I'll get back to you with the results.
This is a big help even if it's only to confirm that there is a much better solution.
Brett
Copy link to clipboard
Copied
Hi,
Both scripts can be executed within Indesign. When I speak of "outside indesign", I am just meaning that you get access to the xml attributes in a xml object rather than browsing through the inner xml structure of an indesign document cause this tends to be slower. However if you need to access the page items depending on the attributes then the "inside" way is the one to adopt.
Loic
Copy link to clipboard
Copied
Ah thanks for clearing that up for me.
Copy link to clipboard
Copied
I wouldn't completely rule out document-XML, as I've seen cases where the ExtendScript XML would just not work due to file size.
There is plenty space for optimizations in the first script, e.g. by use of temporaries - walking the XML tree for every step is indeed a waste of time. This is also true if you use XPaths, they are faster if executed on a sub-tree.
Besides document-XML has the advantage that it can invoke an XSLT transformation during import - very speedy on sorting, conversion of attributes to text content and so forth - as far it works ( InDesign's version is showing its age). Of course that may come at the additional cost of learning XSLT.
Copy link to clipboard
Copied
Hi Dirk,
Traditionally I've had to enlist the help of my IT developers to perform XSLT to transform the XML from it's default database structure to something that seamlessly imports into a template structure. While this is extremely quick once setup it is a massive pain from a business point of view as a single development change here takes at least 1 month to write a spec for, develop, test and lock in, and that's before I get hold of it. This process is something we can no longer continue with due to other development commitments and changes to processes, so the task fell on my shoulders, yay!
And yes I could learn XSLT but I am also in the unenviable position of being a "jack of all trades and master of none" which means my brain is confused enough as it is.![]()
On the bright side...
Thanks to Loic I have redesigned my original script and I have a 1,000% improvement in speed and the script is more than 30% smaller and it is easy to read and understand! Here is the new code structure breakdown:
function VEC_collectXML() {
var root = app.activeDocument.xmlElements[0];
var myXPath = root.evaluateXPathExpression("//*"); // This won't work here //"//*/@*"
// JSON object.
myObject = {
Foo : "",
Bar : ""
// and so on...
}
// Collect all the data from the XML.
for (var n = 0; n < myXPath.length; n++) {
for (var i = 0; i < myXPath.xmlAttributes.length; i++) {
switch (myXPath.xmlAttributes.name) {
case "Foo" :
myObject.Foo = myXPath.xmlAttributes.value;
break;
case "Bar" :
myObject.Bar = myXPath.xmlAttributes.value;
break;
// and so on...
}
}
}
}var myObject = [];
VEC_collectXML();
Copy link to clipboard
Copied
Yeah, in comparison to other programming environments, XSLT is a very different beast.
Here some more snippets that should speed you up even further:
If you have many unused attributes, access the few used ones directly by name, and use "isValid" to check for existence.
If you are only looking for attributes on specific elements, no need for the wildcard "//*", in my case I only expect "Root" elements.
Rather than copy-pasting expressions, use more local variables, here for the myXE.
Same goes for the for-loop upper limit, if it is unchanged in the loop.
var rootXE = app.activeDocument.xmlElements[0];
var myXPath = "//*"; // an XPath that yields all elements
var myXPath = "//Root"; // an XPath that yields only "Root" elements
var myXEs = rootXE.evaluateXPathExpression(myXPath); // Array of all matching XMLElements
var myXECnt = myXEs.length; // count of XMLElements, determine only once
for (var n = 0; n < myXECnt; n++) {
var myXE = myXEs
; var fooXA = myXE.xmlAttributes.item("Foo");
if( fooXA.isValid) {
$.writeln(fooXA.value);
} else {
$.writeln("Nope.");
}
}
Copy link to clipboard
Copied
Thanks for the help Dirk! ![]()
Copy link to clipboard
Copied
Hi Dirk,
Thanks for reminding a few facts on xml exploration. You are right, extendscript fails with large xml strings. I noticed that sometimes indeed.
Loic
Copy link to clipboard
Copied
Hi Dirk,
Just some feedback on the code you posted.
I've redone my script with your solution and I had a very unexpected result. It more than doubled the process time of the re-designed script with the switch statements but the JSON population results are the same. I've used your script without modifying it and inserted the if statements where your "fooXA" if statement is (I have 42 if statements as the XML is quite complicated).
I'm not sure why this is the case as there should be less iterations (1 for loop, all if statements have .isValid in the call). Very strange.
Brett
Copy link to clipboard
Copied
Hi Brett, thanks for sharing your experience. That's the problem with optimizations, you have to revisit them on a case by case basis. For that reason I also wrote "if you have many unused attributes".
In your case the switch works so fast because so many attributes are used, probably the majority of them. Rather than searching each of those 42 by name, your switch extracts the name once and the ExtendScript subsystem can do the further work locally without bothering InDesign. I still wouldn't have thought that accessing attributes by name is so slow.
One more idea: You could extract all attribute names of your current element at once (that's one call to InDesign), then iterate the resulting string array.
var attNames = myXE.xmlAttributes.everyItem().name;
var attValues = myXE.xmlAttributes.everyItem().value;
var attCnt = attNames.length;
for( var n=0; n<attCnt; n++ ) {
switch( attNames
) {
Copy link to clipboard
Copied
Thanks so much for explaining this to me it really makes sense. Approximately 99% of the attributes in my XML are populated so there would be next to no wasted effort with the switch. I will try an remove my second loop (as shown in your 2nd example) and see if that yields any benefit.
Copy link to clipboard
Copied
var rootXE = app.activeDocument.xmlElements[0];
var myXPath = "//*"; // an XPath that yields all elements
var myXPath = "//TABLE-REF"; // an XPath that yields only "Root" elements
var myXEs = rootXE.evaluateXPathExpression(myXPath); // Array of all matching XMLElements
var myXECnt = myXEs.length; // count of XMLElements, determine only once
for (var n = 0; n < myXECnt; n++) {
var myXE = myXEs
var fooXA = myXE.xmlAttributes.item("POINTER");
if( fooXA.isValid) {
$.writeln(fooXA.value);
} else {
$.writeln("Nope.");
}
}
The above code return the value
Alpers-ch002-tbl005
Alpers-ch002-tbl004
Alpers-ch002-tbl003
Alpers-ch002-tbl002
Alpers-ch002-tbl001
But i need to get the value by order like
Alpers-ch002-tbl001
Alpers-ch002-tbl002
Alpers-ch002-tbl003
Alpers-ch002-tbl004
Alpers-ch002-tbl005
please advice
Copy link to clipboard
Copied
Sreekathik,
you can either
Get ready! An upgraded Adobe Community experience is coming in January.
Learn more