Copy link to clipboard
Copied
Hello. Need to organize 2500 pages. Each page has searchable text containing a key ID on each page. I need to organize the pages by this ID. How can this be accomplished? Thank you.
Copy link to clipboard
Copied
May be possible with a script.
Copy link to clipboard
Copied
Thank you for the response. Can you point me in the direction of a pre made scrip?
Copy link to clipboard
Copied
Yes, that can be done with a script.
The key ID is always at the same place on each page?
@+
Copy link to clipboard
Copied
Thank you for the response. It is in the same general area on each page. Some time I have to add the numbebecause the scanned doc is not egible. Can you point me in the direction of a pre made scrip?
Copy link to clipboard
Copied
There's no pre-made script for this. It will have to be custom-made to fit the specific requirements in your case. The text in question can be identified based on various things: Its location on the page, its contents, the text around it, or a combination of some/all of the above.
I've developed many similar tools in the past, so if you're interested in hiring a professional to create it for you, feel free to contact me privately by clicking my user-name and then on "Send a Message".
Copy link to clipboard
Copied
Thank you again. I will keep your information on hand while I further tackle this.
Copy link to clipboard
Copied
I can help you if you give me more information about how find the key ID!
If this is a position, please give it to me.
Or maybe the key ID is always after a word or a series of words ("Reference ID:" for example).
Could you also let me know the pattern of the key ID (x digits or words, or mixed 123AB45CDEF, etc.)
The best would be to share a file with a few pages containing fictitious information.
@+
Copy link to clipboard
Copied
Thank you for your help. I do not know wht you mean by "Location" or how to "find the key ID". In the attached sample, I want to organize the pages by the "device no: 277", specifically the unique number. Some times it is in the same place on all forms. Some times is it added with editing because the scan is not recognizing the text.
Copy link to clipboard
Copied
Hi,
Here is the script I did for you:
d0=new Date();
starting=util.printd("mm/dd/yy – HH:MM:ss",d0);
var theIDs=[];
var noIDs=[];
for (var p=0; p<this.numPages; p++) {
var found=0;
var numWords=this.getPageNumWords(p);
if (this.getPageNumWords(p)) {
for (var i=0; i<this.getPageNumWords(p); i++) {
if (this.getPageNthWord(p,i,true).toUpperCase()=="DEVICE" && this.getPageNthWord(p,i+1,true)=="NO") {
theIDs.push([Number(this.getPageNthWord(p,i+2,true)),p]);
found++;
break;
} else if (this.getPageNthWord(p,i,true).toUpperCase().indexOf("DE")==0 && this.getPageNthWord(p,i+2,true)=="NO") {
theIDs.push([Number(this.getPageNthWord(p,i+3,true)),p]);
found++;
break;
} else if (this.getPageNthWord(p,i,true).toUpperCase()=="ID" && !isNaN(Number(this.getPageNthWord(p,i+1,true)))) {
theIDs.push([Number(this.getPageNthWord(p,i+1,true)),p]);
found++;
break;
}
}
}
if (!found) noIDs.push(p);
}
if (theIDs.length) {
theIDs.sort(function(a, b){return a[0]-b[0]});
var newDoc=app.newDoc();
for (var i=0; i<theIDs.length; i++) {
newDoc.insertPages ({
nPage: newDoc.numPages-1,
cPath: this.path,
nStart: theIDs[i][1]
});
}
newDoc.deletePages(0);
if (noIDs.length) {
for (var i=0; i<noIDs.length; i++) {
newDoc.insertPages ({
nPage: newDoc.numPages-1,
cPath: this.path,
nStart: noIDs[i]
});
}
}
newDoc.saveAs(this.path.replace(/.pdf$/i," \(Sorted\).pdf"));
df=new Date();
ending=util.printd("mm/dd/yy – HH:MM:ss",df);
temps=(df.valueOf()-d0.valueOf())/1000/60;
var theMinutes=parseInt(temps);
var theSeconds=(temps-theMinutes)*60;
var theSeconds=parseInt(theSeconds*10)/10;
var theTime="";
if (theMinutes>0) {
if (theMinutes==1) var theTime="1 minute";
else var theTime=theMinutes+" minutes";
}
if (theSeconds>0) {
if (theSeconds<2) var theTime=theTime+" "+theSeconds+" second";
else var theTime=theTime+" "+theSeconds+" seconds";
}
var theTime=theTime.replace(/^\s+|\s+$/gm,"");
var plurial=theIDs.length>1?"s":"";
var txt="Process starting: "+starting+"\rProcess ending: "+ending+"\rProcess duration: "+theTime+"\r\r"+theIDs.length+" reference"+plurial+" sorted.";
if (noIDs.length) {
var plurial=noIDs.length>1?"s":"";
txt+="\r"+noIDs.length+" page"+plurial+" without any ID found.";
}
console.clear();
console.println(txt);
app.alert(txt,3);
}
With this script the attached file has been generated in about 1 second, but that will take more time for your 2500-page file and maybe you would prefer this other script which will show you the progress of the process.
d0=new Date();
starting=util.printd("mm/dd/yy – HH:MM:ss",d0);
var theIDs=[];
var noIDs=[];
for (var p=0; p<this.numPages; p++) {
console.clear();
console.println("Process starting: "+starting);
console.println("Processing the page #"+(p+1)+"/"+this.numPages);
var found=0;
var numWords=this.getPageNumWords(p);
if (this.getPageNumWords(p)) {
for (var i=0; i<this.getPageNumWords(p); i++) {
if (this.getPageNthWord(p,i,true).toUpperCase()=="DEVICE" && this.getPageNthWord(p,i+1,true)=="NO") {
theIDs.push([Number(this.getPageNthWord(p,i+2,true)),p]);
found++;
break;
} else if (this.getPageNthWord(p,i,true).toUpperCase().indexOf("DE")==0 && this.getPageNthWord(p,i+2,true)=="NO") {
theIDs.push([Number(this.getPageNthWord(p,i+3,true)),p]);
found++;
break;
} else if (this.getPageNthWord(p,i,true).toUpperCase()=="ID" && !isNaN(Number(this.getPageNthWord(p,i+1,true)))) {
theIDs.push([Number(this.getPageNthWord(p,i+1,true)),p]);
found++;
break;
}
}
}
if (!found) noIDs.push(p);
}
if (theIDs.length) {
theIDs.sort(function(a, b){return a[0]-b[0]});
var newDoc=app.newDoc();
for (var i=0; i<theIDs.length; i++) {
console.clear();
console.println("Process starting: "+starting);
console.println("Adding page #"+(theIDs[i][1]+1)+" to the new file.");
newDoc.insertPages ({
nPage: newDoc.numPages-1,
cPath: this.path,
nStart: theIDs[i][1]
});
}
newDoc.deletePages(0);
if (noIDs.length) {
for (var i=0; i<noIDs.length; i++) {
console.clear();
console.println("Process starting: "+starting);
console.println("Adding page #"+(noIDs[i]+1)+" to the new file.");
newDoc.insertPages ({
nPage: newDoc.numPages-1,
cPath: this.path,
nStart: noIDs[i]
});
}
}
newDoc.saveAs(this.path.replace(/.pdf$/i," \(Sorted\).pdf"));
df=new Date();
ending=util.printd("mm/dd/yy – HH:MM:ss",df);
temps=(df.valueOf()-d0.valueOf())/1000/60;
var theMinutes=parseInt(temps);
var theSeconds=(temps-theMinutes)*60;
var theSeconds=parseInt(theSeconds*10)/10;
var theTime="";
if (theMinutes>0) {
if (theMinutes==1) var theTime="1 minute";
else var theTime=theMinutes+" minutes";
}
if (theSeconds>0) {
if (theSeconds<2) var theTime=theTime+" "+theSeconds+" second";
else var theTime=theTime+" "+theSeconds+" seconds";
}
var theTime=theTime.replace(/^\s+|\s+$/gm,"");
var plurial=theIDs.length>1?"s":"";
var txt="Process starting: "+starting+"\rProcess ending: "+ending+"\rProcess duration: "+theTime+"\r\r"+theIDs.length+" reference"+plurial+" sorted.";
if (noIDs.length) {
var plurial=noIDs.length>1?"s":"";
txt+="\r"+noIDs.length+" page"+plurial+" without any ID found.";
}
console.clear();
console.println(txt);
app.alert(txt,3);
}
Let me know if that suits to you and how long did that take for your entire file.
@+
Copy link to clipboard
Copied
Wow. This is great java wizardry. I will apply this any let you know. Thank you for sharing your skill set 😃