Regex to match date and initals

Explorer ,
May 31, 2018

Copy link to clipboard

Copied

I have a regex which should find a string pattern of any of the following formats

28.05.2018 SB RS CS

28-05-2018 SB RS CS

28/05/2018 SB RS CS

The regular expressions matches the first example with the full stops in the date, but does not match the second and third example with the dashes and forward slashes in the date.

Can any one please advise how to correct the regular expression so that it matches a dash or a forward slash?

Any assistance will be most appreciated.

This is part of a script which matches the string and reports it back to the console.

var ckWords4; // 4 words  to test

var re = new RegExp(/\d{1,2}[/.-]\d{1,2}[/.-]\d{2,4}\s([A-Z]{2,5})\s([A-Z]{2,5})\s([A-Z]{2,5})/);

var count; // count number of words

numWords = this.getPageNumWords(0); // number of words on page

// loop through the words on page

for (var j = 0; j < numWords-1; j++) { 

ckWords4 = this.getPageNthWord(0, j) + ' ' + this.getPageNthWord(0, j + 1) + ' ' + this.getPageNthWord(0, j + 2) + ' ' + this.getPageNthWord(0, j + 3); // test 4 words        if (ckWords4.match(re)) {

var count = ckWords4.split(/\s+/).length;

console.println(ckWords4 + " " + count);

}

}

Adobe Community Professional
Correct answer by gkaiseril | Adobe Community Professional

Your script does not exclude strings like "05/25/018" which is not a valid date.

When testing your regular expression, I get only one match when there would appear to be 5 matches.

var ckWords4; // 4 words  to test;

var ckWords6; // 6 words to test;

var re = /^(\d{1,2}[/.-]\d{1,2}[/.-])(\d{2}|\d{4})(\s[A-Z]{2,5}\s[A-Z]{2,5}\s[A-Z]{2,5})$/;

var count; // count number of words

var nMatch = 0;

console.show();

console.clear();

var cMatched;

numWords = this.getPageNumWords(0); // number of words on page

// loop through the words on page

for (var j = 0; j < numWords-1; j++) {

// 4 word string;

ckWords4 = this.getPageNthWord(0, j) + ' ' + this.getPageNthWord(0, j + 1) + ' ' +

this.getPageNthWord(0, j + 2) + ' ' + this.getPageNthWord(0, j + 3); // test 4 words;

// six word string;

ckWords6 = this.getPageNthWord(0, j + 0, false) + this.getPageNthWord(0, j + 1, false) +

this.getPageNthWord(0, j + 2, false) + this.getPageNthWord(0, j + 3, false) +

this.getPageNthWord(0, j + 4, false) + this.getPageNthWord(0, j+ 5, true); // test 6 words;

if(re.test(ckWords4) == true || re.test(ckWords6) == true) {

cMatched = String(RegExp.$1) + String(RegExp.$2) + String(RegExp.$3);

nMatch++;

var count = String(cMatched).split(" ").length;

console.println("words: " + cMatched + " " + count);

} // end RegExp test true;

} // end words on page loop;

console.println("Matches: " + nMatch);

TOPICS
Acrobat SDK and JavaScript

Views

417

Likes

Translate

Translate

Report

Report
Community Guidelines
Be kind and respectful, give credit to the original source of content, and search for duplicates before posting. Learn more

Regex to match date and initals

Explorer ,
May 31, 2018

Copy link to clipboard

Copied

I have a regex which should find a string pattern of any of the following formats

28.05.2018 SB RS CS

28-05-2018 SB RS CS

28/05/2018 SB RS CS

The regular expressions matches the first example with the full stops in the date, but does not match the second and third example with the dashes and forward slashes in the date.

Can any one please advise how to correct the regular expression so that it matches a dash or a forward slash?

Any assistance will be most appreciated.

This is part of a script which matches the string and reports it back to the console.

var ckWords4; // 4 words  to test

var re = new RegExp(/\d{1,2}[/.-]\d{1,2}[/.-]\d{2,4}\s([A-Z]{2,5})\s([A-Z]{2,5})\s([A-Z]{2,5})/);

var count; // count number of words

numWords = this.getPageNumWords(0); // number of words on page

// loop through the words on page

for (var j = 0; j < numWords-1; j++) { 

ckWords4 = this.getPageNthWord(0, j) + ' ' + this.getPageNthWord(0, j + 1) + ' ' + this.getPageNthWord(0, j + 2) + ' ' + this.getPageNthWord(0, j + 3); // test 4 words        if (ckWords4.match(re)) {

var count = ckWords4.split(/\s+/).length;

console.println(ckWords4 + " " + count);

}

}

Adobe Community Professional
Correct answer by gkaiseril | Adobe Community Professional

Your script does not exclude strings like "05/25/018" which is not a valid date.

When testing your regular expression, I get only one match when there would appear to be 5 matches.

var ckWords4; // 4 words  to test;

var ckWords6; // 6 words to test;

var re = /^(\d{1,2}[/.-]\d{1,2}[/.-])(\d{2}|\d{4})(\s[A-Z]{2,5}\s[A-Z]{2,5}\s[A-Z]{2,5})$/;

var count; // count number of words

var nMatch = 0;

console.show();

console.clear();

var cMatched;

numWords = this.getPageNumWords(0); // number of words on page

// loop through the words on page

for (var j = 0; j < numWords-1; j++) {

// 4 word string;

ckWords4 = this.getPageNthWord(0, j) + ' ' + this.getPageNthWord(0, j + 1) + ' ' +

this.getPageNthWord(0, j + 2) + ' ' + this.getPageNthWord(0, j + 3); // test 4 words;

// six word string;

ckWords6 = this.getPageNthWord(0, j + 0, false) + this.getPageNthWord(0, j + 1, false) +

this.getPageNthWord(0, j + 2, false) + this.getPageNthWord(0, j + 3, false) +

this.getPageNthWord(0, j + 4, false) + this.getPageNthWord(0, j+ 5, true); // test 6 words;

if(re.test(ckWords4) == true || re.test(ckWords6) == true) {

cMatched = String(RegExp.$1) + String(RegExp.$2) + String(RegExp.$3);

nMatch++;

var count = String(cMatched).split(" ").length;

console.println("words: " + cMatched + " " + count);

} // end RegExp test true;

} // end words on page loop;

console.println("Matches: " + nMatch);

TOPICS
Acrobat SDK and JavaScript

Views

418

Likes

Translate

Translate

Report

Report
Community Guidelines
Be kind and respectful, give credit to the original source of content, and search for duplicates before posting. Learn more
May 31, 2018 0
Most Valuable Participant ,
Jun 01, 2018

Copy link to clipboard

Copied

My guess is that the dashes and slashes cause the word to be split up, so that each part of it is returned separately by getPageNthWord, which is why it's not working for you. Try running a script that outputs all the words in the page to the console and you'll see if that's the issue.

Likes

Translate

Translate

Report

Report
Community Guidelines
Be kind and respectful, give credit to the original source of content, and search for duplicates before posting. Learn more
Reply
Loading...
Jun 01, 2018 3
Adobe Community Professional ,
Jun 01, 2018

Copy link to clipboard

Copied

Have you reviewed the Acrobat JS documentation abut the getPageNthWord?

You have only used the 2 required parameters, page and nth word. The third is the option to strip the white space and punctuation that follows the returned word. By default this is set to true and does not include this character. It appears you want to test that character so you need to set this true. Also you could use the console println command to print out the returned words to the console so you could see what you are trying to examine. Also, it is possible to include conditional testing of characters within the RegExp string. Both the "-" and "/" characters are considered punctuation characters and thus indicate the end of word. So if your string has these characters within the string, the number of words you will need to search increases.

Likes

Translate

Translate

Report

Report
Community Guidelines
Be kind and respectful, give credit to the original source of content, and search for duplicates before posting. Learn more
Reply
Loading...
Jun 01, 2018 3
Adobe Community Professional ,
Jun 03, 2018

Copy link to clipboard

Copied

Your script does not exclude strings like "05/25/018" which is not a valid date.

When testing your regular expression, I get only one match when there would appear to be 5 matches.

var ckWords4; // 4 words  to test;

var ckWords6; // 6 words to test;

var re = /^(\d{1,2}[/.-]\d{1,2}[/.-])(\d{2}|\d{4})(\s[A-Z]{2,5}\s[A-Z]{2,5}\s[A-Z]{2,5})$/;

var count; // count number of words

var nMatch = 0;

console.show();

console.clear();

var cMatched;

numWords = this.getPageNumWords(0); // number of words on page

// loop through the words on page

for (var j = 0; j < numWords-1; j++) {

// 4 word string;

ckWords4 = this.getPageNthWord(0, j) + ' ' + this.getPageNthWord(0, j + 1) + ' ' +

this.getPageNthWord(0, j + 2) + ' ' + this.getPageNthWord(0, j + 3); // test 4 words;

// six word string;

ckWords6 = this.getPageNthWord(0, j + 0, false) + this.getPageNthWord(0, j + 1, false) +

this.getPageNthWord(0, j + 2, false) + this.getPageNthWord(0, j + 3, false) +

this.getPageNthWord(0, j + 4, false) + this.getPageNthWord(0, j+ 5, true); // test 6 words;

if(re.test(ckWords4) == true || re.test(ckWords6) == true) {

cMatched = String(RegExp.$1) + String(RegExp.$2) + String(RegExp.$3);

nMatch++;

var count = String(cMatched).split(" ").length;

console.println("words: " + cMatched + " " + count);

} // end RegExp test true;

} // end words on page loop;

console.println("Matches: " + nMatch);

Likes

Translate

Translate

Report

Report
Community Guidelines
Be kind and respectful, give credit to the original source of content, and search for duplicates before posting. Learn more
Reply
Loading...
Jun 03, 2018 1
Jo_2013 LATEST
Explorer ,
Jun 04, 2018

Copy link to clipboard

Copied

Thank you so much for being very helpful, the explanation you gave made it easy for me to understand where the changes needed to be made to the script in regards to punctuation. Your example for modifying the script was fantastic and the regex now works and finds the words with the forward slash, full stop or dash. Much appreciated, have a great day.

Likes

Translate

Translate

Report

Report
Community Guidelines
Be kind and respectful, give credit to the original source of content, and search for duplicates before posting. Learn more
Reply
Loading...
Jun 04, 2018 0