Skip to main content
Participating Frequently
June 5, 2023
Question

Page Organization by text (unique ID number on page). Script needed?

  • June 5, 2023
  • 2 replies
  • 1953 views

Hello.  Need to organize 2500 pages.  Each page has searchable text containing a key ID on each page.  I need to organize the pages by this ID.  How can this be accomplished?  Thank you.

This topic has been closed for replies.

2 replies

bebarth
Community Expert
June 5, 2023

Yes, that can be done with a script.

The key ID is always at the same place on each page?

@+

OCD BObAuthor
Participating Frequently
June 5, 2023

Thank you for the response.  It is in the same general area on each page.  Some time I have to add the numbebecause the scanned doc is not egible.  Can you point me in the direction of a pre made scrip?  

OCD BObAuthor
Participating Frequently
June 6, 2023

Hi,

Here is the script I did for you:

d0=new Date();
starting=util.printd("mm/dd/yy – HH:MM:ss",d0);
var theIDs=[];
var noIDs=[];
for (var p=0; p<this.numPages; p++) {
	var found=0;
	var numWords=this.getPageNumWords(p);
	if (this.getPageNumWords(p)) {
		for (var i=0; i<this.getPageNumWords(p); i++) {
			if (this.getPageNthWord(p,i,true).toUpperCase()=="DEVICE" && this.getPageNthWord(p,i+1,true)=="NO") {
				theIDs.push([Number(this.getPageNthWord(p,i+2,true)),p]);
				found++;
				break;
			} else if (this.getPageNthWord(p,i,true).toUpperCase().indexOf("DE")==0 && this.getPageNthWord(p,i+2,true)=="NO") {
				theIDs.push([Number(this.getPageNthWord(p,i+3,true)),p]);
				found++;
				break;
			} else if (this.getPageNthWord(p,i,true).toUpperCase()=="ID" && !isNaN(Number(this.getPageNthWord(p,i+1,true)))) {
				theIDs.push([Number(this.getPageNthWord(p,i+1,true)),p]);
				found++;
				break;
			}
		}
	}
	if (!found) noIDs.push(p);
}
if (theIDs.length) {
	theIDs.sort(function(a, b){return a[0]-b[0]});
	var newDoc=app.newDoc();
	for (var i=0; i<theIDs.length; i++) {
		newDoc.insertPages ({
			nPage: newDoc.numPages-1,
			cPath: this.path,
			nStart: theIDs[i][1]
		});
	}
	newDoc.deletePages(0);
	if (noIDs.length) {
		for (var i=0; i<noIDs.length; i++) {
			newDoc.insertPages ({
				nPage: newDoc.numPages-1,
				cPath: this.path,
				nStart: noIDs[i]
			});
		}
	}
	newDoc.saveAs(this.path.replace(/.pdf$/i," \(Sorted\).pdf"));
	df=new Date();
	ending=util.printd("mm/dd/yy – HH:MM:ss",df);
	temps=(df.valueOf()-d0.valueOf())/1000/60;
	var theMinutes=parseInt(temps);
	var theSeconds=(temps-theMinutes)*60;
	var theSeconds=parseInt(theSeconds*10)/10;
	var theTime="";
	if (theMinutes>0) {
		if (theMinutes==1) var theTime="1 minute";
		else var theTime=theMinutes+" minutes";
	}
	if (theSeconds>0) {
		if (theSeconds<2) var theTime=theTime+" "+theSeconds+" second";
		else var theTime=theTime+" "+theSeconds+" seconds";
	}
	var theTime=theTime.replace(/^\s+|\s+$/gm,"");
	var plurial=theIDs.length>1?"s":"";
	var txt="Process starting: "+starting+"\rProcess ending: "+ending+"\rProcess duration: "+theTime+"\r\r"+theIDs.length+" reference"+plurial+" sorted.";
	if (noIDs.length) {
		var plurial=noIDs.length>1?"s":"";
		txt+="\r"+noIDs.length+" page"+plurial+" without any ID found.";
	}
	console.clear();
	console.println(txt);
	app.alert(txt,3);
}

With this script the attached file has been generated in about 1 second, but that will take more time for your 2500-page file and maybe you would prefer this other script which will show you the progress of the process.

d0=new Date();
starting=util.printd("mm/dd/yy – HH:MM:ss",d0);
var theIDs=[];
var noIDs=[];
for (var p=0; p<this.numPages; p++) {
	console.clear();
	console.println("Process starting: "+starting);
	console.println("Processing the page #"+(p+1)+"/"+this.numPages);
	var found=0;
	var numWords=this.getPageNumWords(p);
	if (this.getPageNumWords(p)) {
		for (var i=0; i<this.getPageNumWords(p); i++) {
			if (this.getPageNthWord(p,i,true).toUpperCase()=="DEVICE" && this.getPageNthWord(p,i+1,true)=="NO") {
				theIDs.push([Number(this.getPageNthWord(p,i+2,true)),p]);
				found++;
				break;
			} else if (this.getPageNthWord(p,i,true).toUpperCase().indexOf("DE")==0 && this.getPageNthWord(p,i+2,true)=="NO") {
				theIDs.push([Number(this.getPageNthWord(p,i+3,true)),p]);
				found++;
				break;
			} else if (this.getPageNthWord(p,i,true).toUpperCase()=="ID" && !isNaN(Number(this.getPageNthWord(p,i+1,true)))) {
				theIDs.push([Number(this.getPageNthWord(p,i+1,true)),p]);
				found++;
				break;
			}
		}
	}
	if (!found) noIDs.push(p);
}
if (theIDs.length) {
	theIDs.sort(function(a, b){return a[0]-b[0]});
	var newDoc=app.newDoc();
	for (var i=0; i<theIDs.length; i++) {
		console.clear();
		console.println("Process starting: "+starting);
		console.println("Adding page #"+(theIDs[i][1]+1)+" to the new file.");
		newDoc.insertPages ({
			nPage: newDoc.numPages-1,
			cPath: this.path,
			nStart: theIDs[i][1]
		});
	}
	newDoc.deletePages(0);
	if (noIDs.length) {
		for (var i=0; i<noIDs.length; i++) {
			console.clear();
			console.println("Process starting: "+starting);
			console.println("Adding page #"+(noIDs[i]+1)+" to the new file.");
			newDoc.insertPages ({
				nPage: newDoc.numPages-1,
				cPath: this.path,
				nStart: noIDs[i]
			});
		}
	}
	newDoc.saveAs(this.path.replace(/.pdf$/i," \(Sorted\).pdf"));
	df=new Date();
	ending=util.printd("mm/dd/yy – HH:MM:ss",df);
	temps=(df.valueOf()-d0.valueOf())/1000/60;
	var theMinutes=parseInt(temps);
	var theSeconds=(temps-theMinutes)*60;
	var theSeconds=parseInt(theSeconds*10)/10;
	var theTime="";
	if (theMinutes>0) {
		if (theMinutes==1) var theTime="1 minute";
		else var theTime=theMinutes+" minutes";
	}
	if (theSeconds>0) {
		if (theSeconds<2) var theTime=theTime+" "+theSeconds+" second";
		else var theTime=theTime+" "+theSeconds+" seconds";
	}
	var theTime=theTime.replace(/^\s+|\s+$/gm,"");
	var plurial=theIDs.length>1?"s":"";
	var txt="Process starting: "+starting+"\rProcess ending: "+ending+"\rProcess duration: "+theTime+"\r\r"+theIDs.length+" reference"+plurial+" sorted.";
	if (noIDs.length) {
		var plurial=noIDs.length>1?"s":"";
		txt+="\r"+noIDs.length+" page"+plurial+" without any ID found.";
	}
	console.clear();
	console.println(txt);
	app.alert(txt,3);
}

Let me know if that suits to you and how long did that take for your entire file.

@+


Wow.  This is great java wizardry.  I will apply this any let you know.  Thank you for sharing your skill set 😃

Bernd Alheit
Community Expert
June 5, 2023

May be possible with a script.

OCD BObAuthor
Participating Frequently
June 5, 2023

Thank you for the response.  Can you point me in the direction of a pre made scrip?