Copy link to clipboard
Copied
I have a modified version of a csv parsing code from here: excel.js/csv.js at master · thetalecrafter/excel.js · GitHub Javascript code to parse CSV data - Stack Overflow
I've always used it with Illustrator scripting and it worked instantly. However, using it inside ID takes an outrageous amount of time to do the same thing!
Try the following snippet using #target illustrator and then using #target indesign.
#target illustrator
//#target indesign
function test(){
var grid = {
parse: function(csv, reviver, delimiter) {
delimiter = delimiter || ',';
reviver = reviver || function(r, c, v) { return v; };
var chars = csv.split(''), c = 0, cc = chars.length, start, end, table = [], row;
while (c < cc) {
table.push(row = []);
while (c < cc && '\r' !== chars
start = end = c;
if ('"' === chars
start = end = ++c;
while (c < cc) {
if ('"' === chars
if ('"' !== chars[c+1]) { break; }
else { chars[++c] = ''; } /* unescape "" */
}
end = ++c;
}
if ('"' === chars
while (c < cc && '\r' !== chars
} else {
while (c < cc && '\r' !== chars
}
row.push(reviver(table.length-1, row.length, chars.slice(start, end).join('')));
if (delimiter === chars
}
if ('\r' === chars
if ('\n' === chars
}
return table;
},
stringify: function(table, replacer, delimiter) {
replacer = replacer || function(r, c, v) { return v; };
var csv = '', c, cc, r, rr = table.length, cell;
for (r = 0; r < rr; ++r) {
if (r) { csv += '\r\n'; }
for (c = 0, cc = table
if (c) { csv += delimiter; }
cell = replacer(r, c, table
var rx = new RegExp("["+delimiter+"\\r"+"\\n\"]");
if (rx.test(cell)) { cell = '"' + cell.replace(/"/g, '""') + '"'; }
csv += (cell || 0 === cell) ? cell : '';
}
}
return csv;
}
};
var f = File.openDialog();
if(f){
f.open('r');
var fileStr = f.read();
f.close();
var parsedFile = grid.parse(fileStr);
var len = parsedFile.length;
alert("The CSV file has " + len + " records.");
}
};
test();
Ok Here's my attempt
You can download it from http://download.creative-scripts.com/InDesign_CSV_Parser
The test file I used was somewhat complicated you can download it from http://download.creative-scripts.com/InDesign_CSV_Parser_Test_File
APP | InDesign_CSV_Parser.jsx (By Trevor) | Andy's Original Script |
---|---|---|
ILLUSTRATOR | 5047ms (5 Seconds) | 3733ms (3.7 Seconds) |
INDESIGN | 6227ms (6.3 Seconds) | 682689ms (11 minutes and 22.7 seconds) |
So as you can see the original script is about a 1/3 quicker than my version when run
...Copy link to clipboard
Copied
Hi Vassily,
Never noticed CSV processing was specifically long but I don't use that library of yours. My approach is rather:
var main = function() {
var csv = getfCSVFile(),
data;
if !csv ) return;
data = getCSVData ( csv );
if ( !data.rows || !data.rows.length ) {
alert("No data found sorry !");
return;
}
alert( data.rows.length+ " rowws found and headers are\r"+data.headers.join("\r"));
}
var u;
var getfCSVFile = function ( ) {
var
os = $.os[0],
mac = os=="M",
wF = "CSV files : *.csv;",
mF = function() {
return (f instanceof Folder) || /\.csv/i.test(f.name) );
},
f = File.openDialog("Please pick a CSV file…", mac? mF : wF );
return f;
}
var getCSVData = function ( csvFile ) {
var data = {rows:[]]}, sep = ",";
csvFile.open('r');
data.headers = csvFile.readln ().split(sep);
while ( !csvFile.eof ) {
data.rows.push (csvFile.readln ().split(sep); )
}
return data;
}
app.doScript ( "main()",u,u,UndoModes.ENTIRE_SCRIPT, "The Script" );
But it will fails differenciating carriage returns from line feeds and it needs you to know the separator. Let me know if it's "speeder" than the lib above.
Copy link to clipboard
Copied
Yea, a simple parse like that works instantly in both AI and ID, but in my case they need to have commas inside their cells.
If you try my snippet, you'll see a strange and enormous time discrepancy between executing in AI and ID.
Copy link to clipboard
Copied
The JS engine in Indesign is the oldest one of the Adobe products AFIK, and it does have quite some inefficiencies when using regular expressions in a large string.
Some performance gains can be had in your code by moving the regex compilation outside of the loops in the stringify method, and using chunks in the parse function instead of going character by character.
Moving forward, I used with great results a modified version of GitHub - cparker15/CSV-js: A CSV (comma-separated values) parser written in JavaScript. And I heard great things about GitHub - knrz/CSV.js: A simple, blazing-fast CSV parser and encoder. Full RFC 4180 compliance.
Copy link to clipboard
Copied
Interesting. I looked at the first link and saw there's a TODO regarding split lines inside of quotes, and the 2nd link has some code which has to do with higher versions of ES using the 'let' keyword.
Using the 1st method, were you able to get good results when there are line breaks inside of a cell?
Copy link to clipboard
Copied
Coming back to this a year later - I tried to change both of these code resources to work with ES3 but to no avail. For some reason, all kinds of errors appear.
Andy's regexp-heavy parser is the only one which seems to do the job for me, but it still does take a long time in Indesign. So much so that I attempted to try a BridgeTalk approach and see it it would sneak the slowness over some other application and it would have been bypassed. This failed miserably.
Well, I wonder what I'm doing wrong: the 1st code that you Vamitul testify of your own usage, had first error being ".push.bind" being unavailable, then after I pasted in an MDN javascript polyfill to add the .bind prototype, another error came immediately after where some array didn't have all its things.
The next parser had that nested comma issue - (that's why I need the parser), but I decided to try it anyway. Of course, some variable had some error, so I didn't get very far.
How are you using this CSV parser in extendscript, Vamitul ?
Copy link to clipboard
Copied
Ok Here's my attempt
You can download it from http://download.creative-scripts.com/InDesign_CSV_Parser
The test file I used was somewhat complicated you can download it from http://download.creative-scripts.com/InDesign_CSV_Parser_Test_File
APP | InDesign_CSV_Parser.jsx (By Trevor) | Andy's Original Script |
---|---|---|
ILLUSTRATOR | 5047ms (5 Seconds) | 3733ms (3.7 Seconds) |
INDESIGN | 6227ms (6.3 Seconds) | 682689ms (11 minutes and 22.7 seconds) |
So as you can see the original script is about a 1/3 quicker than my version when run on Illustrator and about 110 times slower than my version when run on InDesign.
My version uses markers if you wanted to make it more robust you could check the csv file for those markers first and replace them with others if needed.
I didn't test too much and didn't do any Googling so there's probably some better stuff out there.
HTH
Trevor
/*
InDesign_CSV_Parser.jsx A Basic CSV parser by by Trevor FOR INDESIGN
Beta 1 - 28 Feb 2018
___ __ __ __ __ __ __ __ __ __ __
| |__||(_ |(_ |_ / \|__) ||\ || \|_ (_ |/ _ |\ | / \|\ || \_/|||
| | ||__) |__) | \__/| \ || \||__/|____)|\__)| \| \__/| \||__| ...
ON OTHER APPS THERE ARE QUICKER METHODS
NOT OPTIMIZED
http://creative-scripts.com Custom whatever-you-wants
Can download script file from http://download.creative-scripts.com/InDesign_CSV_Parser
Can download test csv file from http://download.creative-scripts.com/InDesign_CSV_Parser_Test_File
These are markers that are unlikely to appear in the csv if they do then the result will be messed up 😞
\uE130 Marker for "" (These are escaped quotes between quotes)
\uE131 Marker for \n
\uE132 Marker for \n inside cell (in between quotes)
\uE133 Marker for , inside cell (in between quotes)
\uE134 Marker for , separating cells (outside quotes)
One could have the script test the csv file to see if they exist and change if so
// jshint esversion:6, devel: true
*/
function csvToTable(csv) {
if (!csv) { return; }
var table, rows, rl;
// Replace escaped quotes with marker
table = csv.replace(/""/g, '\uE130');
// Replace new Lines with marker
table = table.replace(/\n/g, '\uE131');
// Swap order of , / \n and escaped quotes when needed
table = ('\uE131' + table).replace(/(\uE131|,)\uE130"/g, '$1"\uE130').substring(1);
var quotedTextReg, find, quotedCell;
quotedTextReg = /"[^"]+"/g;
var tableBeforeFind, tableAfterFind, findIndex, lastIndex;
// Find all quoted cells
while (find = quotedTextReg.exec(table)) { // jshint ignore:line
quotedCell = find[0];
findIndex = find.index;
lastIndex = quotedTextReg.lastIndex;
tableBeforeFind = table.substring(0, findIndex);
tableAfterFind = table.substring(lastIndex);
// Replace new lines within a cell with marker
// Replace commas within a cell with marker
quotedCell = quotedCell.replace(/\uE131/g, '\uE132').replace(/,/g, '\uE133');
table = tableBeforeFind + quotedCell + tableAfterFind;
}
rows = table.split(/\uE131/);
rl = rows.length;
while (rl--) {
// Remove outer quotes and replace markers back to desired values
// It's quicker to replace for each row one at a time than for the whole table at least with very large tables
// Split to cells
rows[rl] = rows[rl]
.replace(/"/g, '')
.replace(/\uE130/g, '"')
.replace(/,/g, '\uE134')
.replace(/\uE133/g, ',')
.replace(/\uE132/g, '\n')
.split(/\uE134/);
}
return rows;
}
var csv, csvFile, t;
csv = [ // Fallback csvFile in case there's none by the test
'"""1","2,2","""3""","4,""4","""",6,",",8',
'"9',
'9","10',
'',
'10","',
'11","""""12""""",,,,',
'"13"""",13""",14,,,,,,',
'"""15""",,,,,,,',
',,,,,,,',
'"1,""2,2"",""""""3"""""",""4,""""4"","""""""",6,"","",8",,,,,,,'
].join('\n');
var csvFile = File.openDialog();
if (csvFile) {
csvFile.encoding = 'UTF8';
csvFile.open('r');
var csv = csvFile.read();
csvFile.close();
}
t = new Date();
var table = csvToTable(csv);
t = new Date() - t;
$.writeln('Took ' + t + 'ms to process ' + (table.length) + ' rows');
Copy link to clipboard
Copied
Much appreciated, I am sure to take advantage of this one during this year. I think for basic CSV parsing with quotes, this will be very useful in Indesign. As far as better stuff out there, once I test this and before that better stuff is posted here, this answer shall be marked as correct.