Skip to main content
Participant
June 10, 2023
Answered

Help needed to create a javascript to locate text with font size 9 and extract to csv with page refs

  • June 10, 2023
  • 2 replies
  • 1311 views

Hi all, I need help to create a javacript that fins all instances of the thext "A-URN" with 9 numbers afterwards e.g. "A-URN000374863", but the text needs to be in font size 9. I then need all instances to be extracted into a csv with the page references listed too. Any help would be greatly appreciated!!! Thanks Lewis

This topic has been closed for replies.
Correct answer bebarth

Hi,

Below is a script you can run from the consoke window or an action wizard.

As the script can't directly check the font size, a dialog box will ask you to confirm firstly the correct size of a reference before checking all other ones.

Once the process completed you will get an attachment in a copy of your ogiginal file.

The reference surrounded in red is the one use for the font size.

Attached is the result of the example file I used to generate the script, and here is the script:

 

d0=new Date();
starting=util.printd("mm/dd/yy – HH:MM:ss",d0);
var sizeOK=0;
var cancel=0;
var found=0;
var nbRef=0;
var references="REF;PAGE";
for (var p=0; p<this.numPages; p++) {
	for (var i=0; i<this.getPageNumWords(p); i++) {
		console.clear();
		console.println("Process starting: "+starting);
		console.println("Processing the page #"+(p+1)+"/"+this.numPages);
		if (found) console.println(found+"/"+nbRef+" References found at the correct font size");
		if (this.getPageNthWord(p,i,false)=="A-" && /^URN\d{9}$/.test(this.getPageNthWord(p,i+1,true))) {
			nbRef++;
			var theReference="A-"+this.getPageNthWord(p,i+1,true);
			if (!sizeOK) {
				this.pageNum=p;
				var question=app.alert({
					cMsg: "Is this reference \""+theReference+"\" on page "+(p+1)+" in the correct font size?",
					cTitle: "Reference in 9-pt font size",
					nIcon: 2,
					nType: 3
				});
				var q=this.getPageNthWordQuads(p,i);
				var m=(new Matrix2D).fromRotated(this,p);
				var mInv=m.invert();
				var r=mInv.transform(q);
				var r=r.toString();
				var r=r.split(",");
				var q1=this.getPageNthWordQuads(p,i+1);
				var r1=mInv.transform(q1);
				var r1=r1.toString();
				var r1=r1.split(",");
				var f=this.addField("theBox","text",p,[Number(r[0])-2,Number(r[1])+2,Number(r1[6])+2,Number(r1[7])-2]);
				f.textSize=10;
				f.textFont=font.HelvB;
				f.fillColor=color.transparent;
				f.strokeColor=color.red;
				f.userName="Reference for the correct font size";
				this.getField("theBox").setFocus();
			}
			
			if (question==2) {
				try {
					this.removeField("theBox");
				} catch(e) {}
				cancel++;
				break;
			} else if (question==3) {
				this.removeField("theBox");
			} else if (question==4) {
				var q=this.getPageNthWordQuads(p,i);
				var m=(new Matrix2D).fromRotated(this,p);
				var mInv=m.invert();
				var r=mInv.transform(q);
				var r=r.toString();
				var r=r.split(",");
				var height=Number(r[1]-Number(r[7]));
				sizeOK++;
				var question=0;
			}
			if (sizeOK) {
				var q=this.getPageNthWordQuads(p,i);
				var m=(new Matrix2D).fromRotated(this,p);
				var mInv=m.invert();
				var r=mInv.transform(q);
				var r=r.toString();
				var r=r.split(",");
				if ((Number(r[1]-Number(r[7])))==height) {
					references+="\r"+theReference+";"+(p+1);
					found++;
				}
			}
		}
	}
	if (cancel) break;
}
df=new Date();
ending=util.printd("mm/dd/yy – HH:MM:ss",df);
temps=(df.valueOf()-d0.valueOf())/1000/60;
var theMinutes=parseInt(temps);
var theSeconds=(temps-theMinutes)*60;
var theSeconds=parseInt(theSeconds*10)/10;
var theTime="";
if (theMinutes>0) {
	if (theMinutes==1) var theTime="1 minute";
	else var theTime=theMinutes+" minutes";
}
if (theSeconds>0) {
	if (theSeconds<2) var theTime=theTime+" "+theSeconds+" second";
	else var theTime=theTime+" "+theSeconds+" seconds";
}
var theTime=theTime.replace(/^\s+|\s+$/gm,"");
var txt="Process starting: "+starting+"\rProcess ending: "+ending+"\rProcess duration: "+theTime+"\r\r";
if (found) {
	this.createDataObject("References & Pages.csv", "");
	var oFile=util.streamFromString(references);
	this.setDataObjectContents("References & Pages.csv", oFile);
	this.saveAs(this.path.replace(/.pdf$/i," \(With Attached .csv File\).pdf"));
	this.viewState={overViewMode:7};
	var plurial=found>1?"s":"";
	txt+=found+"/"+nbRef+" Reference"+plurial+" found at the correct font size.";
} else txt+="No reference found.";
console.clear();
console.println(txt);
app.alert(txt,3);

 

@+

2 replies

bebarth
Community Expert
bebarthCommunity ExpertCorrect answer
Community Expert
June 12, 2023

Hi,

Below is a script you can run from the consoke window or an action wizard.

As the script can't directly check the font size, a dialog box will ask you to confirm firstly the correct size of a reference before checking all other ones.

Once the process completed you will get an attachment in a copy of your ogiginal file.

The reference surrounded in red is the one use for the font size.

Attached is the result of the example file I used to generate the script, and here is the script:

 

d0=new Date();
starting=util.printd("mm/dd/yy – HH:MM:ss",d0);
var sizeOK=0;
var cancel=0;
var found=0;
var nbRef=0;
var references="REF;PAGE";
for (var p=0; p<this.numPages; p++) {
	for (var i=0; i<this.getPageNumWords(p); i++) {
		console.clear();
		console.println("Process starting: "+starting);
		console.println("Processing the page #"+(p+1)+"/"+this.numPages);
		if (found) console.println(found+"/"+nbRef+" References found at the correct font size");
		if (this.getPageNthWord(p,i,false)=="A-" && /^URN\d{9}$/.test(this.getPageNthWord(p,i+1,true))) {
			nbRef++;
			var theReference="A-"+this.getPageNthWord(p,i+1,true);
			if (!sizeOK) {
				this.pageNum=p;
				var question=app.alert({
					cMsg: "Is this reference \""+theReference+"\" on page "+(p+1)+" in the correct font size?",
					cTitle: "Reference in 9-pt font size",
					nIcon: 2,
					nType: 3
				});
				var q=this.getPageNthWordQuads(p,i);
				var m=(new Matrix2D).fromRotated(this,p);
				var mInv=m.invert();
				var r=mInv.transform(q);
				var r=r.toString();
				var r=r.split(",");
				var q1=this.getPageNthWordQuads(p,i+1);
				var r1=mInv.transform(q1);
				var r1=r1.toString();
				var r1=r1.split(",");
				var f=this.addField("theBox","text",p,[Number(r[0])-2,Number(r[1])+2,Number(r1[6])+2,Number(r1[7])-2]);
				f.textSize=10;
				f.textFont=font.HelvB;
				f.fillColor=color.transparent;
				f.strokeColor=color.red;
				f.userName="Reference for the correct font size";
				this.getField("theBox").setFocus();
			}
			
			if (question==2) {
				try {
					this.removeField("theBox");
				} catch(e) {}
				cancel++;
				break;
			} else if (question==3) {
				this.removeField("theBox");
			} else if (question==4) {
				var q=this.getPageNthWordQuads(p,i);
				var m=(new Matrix2D).fromRotated(this,p);
				var mInv=m.invert();
				var r=mInv.transform(q);
				var r=r.toString();
				var r=r.split(",");
				var height=Number(r[1]-Number(r[7]));
				sizeOK++;
				var question=0;
			}
			if (sizeOK) {
				var q=this.getPageNthWordQuads(p,i);
				var m=(new Matrix2D).fromRotated(this,p);
				var mInv=m.invert();
				var r=mInv.transform(q);
				var r=r.toString();
				var r=r.split(",");
				if ((Number(r[1]-Number(r[7])))==height) {
					references+="\r"+theReference+";"+(p+1);
					found++;
				}
			}
		}
	}
	if (cancel) break;
}
df=new Date();
ending=util.printd("mm/dd/yy – HH:MM:ss",df);
temps=(df.valueOf()-d0.valueOf())/1000/60;
var theMinutes=parseInt(temps);
var theSeconds=(temps-theMinutes)*60;
var theSeconds=parseInt(theSeconds*10)/10;
var theTime="";
if (theMinutes>0) {
	if (theMinutes==1) var theTime="1 minute";
	else var theTime=theMinutes+" minutes";
}
if (theSeconds>0) {
	if (theSeconds<2) var theTime=theTime+" "+theSeconds+" second";
	else var theTime=theTime+" "+theSeconds+" seconds";
}
var theTime=theTime.replace(/^\s+|\s+$/gm,"");
var txt="Process starting: "+starting+"\rProcess ending: "+ending+"\rProcess duration: "+theTime+"\r\r";
if (found) {
	this.createDataObject("References & Pages.csv", "");
	var oFile=util.streamFromString(references);
	this.setDataObjectContents("References & Pages.csv", oFile);
	this.saveAs(this.path.replace(/.pdf$/i," \(With Attached .csv File\).pdf"));
	this.viewState={overViewMode:7};
	var plurial=found>1?"s":"";
	txt+=found+"/"+nbRef+" Reference"+plurial+" found at the correct font size.";
} else txt+="No reference found.";
console.clear();
console.println(txt);
app.alert(txt,3);

 

@+

Participant
June 13, 2023

Hi thank you for your response, I will give it a try and come back to you!

bebarth
Community Expert
Community Expert
June 13, 2023

Hi,

If it's a huge file, try with just a few pages...

@+

try67
Community Expert
Community Expert
June 10, 2023

This is possible, but it's quite tricky. The issue is there's no direct way to know a text's font size in a script, but it can be calculated (to some extent) using the quads array that defines the word's position on the page, but it's not a trivial task.

 

If you're interested in hiring a professional to create it for you, feel free to contact me privately by clicking my user-name and then on "Send a Message".

Participant
June 13, 2023

Hi, Thanks for your reply, I am afraid this solution is not applicable in this case but many thanks for your response.