Copy link to clipboard
Copied
Hi All,
I need help with a text extraction algorithm. Given a string of text inside a larger string, I need to extract a string of a certain number of characters with my given string in the middle. I am going to illustrate it with some nonsense paragraphs, but hopefully it will illustrate when I am trying to do. Here are three paragraphs with the given text "zzzz":
123456789 zzzz 1234567890
1 zzzz 12345 6789 1234567
1234 6789 12345678 zzzz 1I want to extract 10 characters with the "zzzz" in the middle. If the "zzzz" can't be in the middle (as in the 2nd and 3rd paragraphs), I still want 10 characters. So, here is the result I want:
89 zzzz 12
1 zzzz 123
678 zzzz 1Of course, there could be instances where the overall string contains less characters, but I want to set the maximum extraction to a specific number characters (10 in this case). So, given the overall length of a container string, the length of the target string and its position in the container string, and the number of total characters to extract, I am looking for a general algorithm to extract the characters, keeping the target string as close to the center as possible.
I am using ExtendScript but even pseudocode would be helpful. Any ideas or pointers would be appreciated. Thank you very much. -Rick
Copy link to clipboard
Copied
Dear Rick,
At least for the test cases this provides correct results:
main ();
function main() {
var values = ["123456789 zzzz 1234567890", "1 zzzz 12345 6789 1234567", "1234 6789 12345678 zzzz 1"] ;
var sConst = "zzzz";
for (j= 0; j < 3; j++) {
alert (GetMiddlePart(values[j], sConst));
}
} // --- end main
function GetMiddlePart (string, sConst) {
var iLoc, j, lConst, lim, max, min, sResult;
lConst = sConst.length;
max = string.length;
min = Math.floor((max-10)/2);
lim = Math.floor((10-lConst)/2);
iLoc = string.indexOf(sConst);
if (iLoc > lim && iLoc < max-10) {
sResult = string.substr(min, 10);
} else if (iLoc < lim) { // towards beginning
sResult = string.substr(0, 10);
} else { // towards the end
sResult = string.substr(max-10);
}
return sResult;
} // --- end GetMiddlePartk
Copy link to clipboard
Copied
This may be more general:
main ();
function main() {
var values = ["123456789 zzzz 1234567890", "1 zzzz 12345 6789 1234567"
, "1234 6789 12345678 zzzz 1", "a zzzz q", "ab zzzz yz"];
var sCore = "zzzz", lBox = 10;
for (j= 0; j < values.length; j++) {
alert (values[j] + "\n" + GetMiddlePart(values[j], sCore, lBox));
}
} // --- end main
function GetMiddlePart (string, sCore, lBox) {
var boundL, boundR, iLoc, j, lCore, lString, sResult, wrap;
lString = string.length;
lCore = sCore.length;
if (lBox >= lString) {
return string;
}
boundL = Math.floor((lString-lBox)/2);
iLoc = string.indexOf(sCore);
if (iLoc > boundL && iLoc < lString-lBox) { // somewhere in the middle
sResult = string.substr(boundL, lBox);
} else if (iLoc < boundL) { // towards beginning
sResult = string.substr(0, lBox);
} else { // towards the end
sResult = string.substr(lString-lBox);
}
return sResult;
} // --- end GetMiddlePart
Copy link to clipboard
Copied
var sourceString = '1234 6789 12345678 zzzz 1';
var searchString = 'zzzz';
var resultLength = 10;
var numberOfCharsEitherSide;
var regex, result;
numberOfCharsEitherSide = (resultLength - Math.floor(searchString.length)) / 2;
do {
regex = '.{0,' + numberOfCharsEitherSide + '}' +
searchString +
'.{0,' + numberOfCharsEitherSide + '}';
result = sourceString.match(regex);
numberOfCharsEitherSide++;
} while (result[0].length > 0 && result[0].length < resultLength);
alert('result: ' + result);
Get ready! An upgraded Adobe Community experience is coming in January.
Learn more