Copy link to clipboard
Copied
Hi All,
I need help with a text extraction algorithm. Given a string of text inside a larger string, I need to extract a string of a certain number of characters with my given string in the middle. I am going to illustrate it with some nonsense paragraphs, but hopefully it will illustrate when I am trying to do. Here are three paragraphs with the given text "zzzz":
123456789 zzzz 1234567890
1 zzzz 12345 6789 1234567
1234 6789 12345678 zzzz 1
I want to extract 10 characters with the "zzzz" in the middle. If the "zzzz" can't be in the middle (as in the 2nd and 3rd paragraphs), I still want 10 characters. So, here is the result I want:
89 zzzz 12
1 zzzz 123
678 zzzz 1
Of course, there could be instances where the overall string contains less characters, but I want to set the maximum extraction to a specific number characters (10 in this case). So, given the overall length of a container string, the length of the target string and its position in the container string, and the number of total characters to extract, I am looking for a general algorithm to extract the characters, keeping the target string as close to the center as possible.
I am using ExtendScript but even pseudocode would be helpful. Any ideas or pointers would be appreciated. Thank you very much. -Rick
Copy link to clipboard
Copied
Dear Rick,
At least for the test cases this provides correct results:
main ();
function main() {
var values = ["123456789 zzzz 1234567890", "1 zzzz 12345 6789 1234567", "1234 6789 12345678 zzzz 1"] ;
var sConst = "zzzz";
for (j= 0; j < 3; j++) {
alert (GetMiddlePart(values[j], sConst));
}
} // --- end main
function GetMiddlePart (string, sConst) {
var iLoc, j, lConst, lim, max, min, sResult;
lConst = sConst.length;
max = string.length;
min = Math.floor((max-10)/2);
lim = Math.floor((10-lConst)/2);
iLoc = string.indexOf(sConst);
if (iLoc > lim && iLoc < max-10) {
sResult = string.substr(min, 10);
} else if (iLoc < lim) { // towards beginning
sResult = string.substr(0, 10);
} else { // towards the end
sResult = string.substr(max-10);
}
return sResult;
} // --- end GetMiddlePart
k
Copy link to clipboard
Copied
This may be more general:
main ();
function main() {
var values = ["123456789 zzzz 1234567890", "1 zzzz 12345 6789 1234567"
, "1234 6789 12345678 zzzz 1", "a zzzz q", "ab zzzz yz"];
var sCore = "zzzz", lBox = 10;
for (j= 0; j < values.length; j++) {
alert (values[j] + "\n" + GetMiddlePart(values[j], sCore, lBox));
}
} // --- end main
function GetMiddlePart (string, sCore, lBox) {
var boundL, boundR, iLoc, j, lCore, lString, sResult, wrap;
lString = string.length;
lCore = sCore.length;
if (lBox >= lString) {
return string;
}
boundL = Math.floor((lString-lBox)/2);
iLoc = string.indexOf(sCore);
if (iLoc > boundL && iLoc < lString-lBox) { // somewhere in the middle
sResult = string.substr(boundL, lBox);
} else if (iLoc < boundL) { // towards beginning
sResult = string.substr(0, lBox);
} else { // towards the end
sResult = string.substr(lString-lBox);
}
return sResult;
} // --- end GetMiddlePart
Copy link to clipboard
Copied
var sourceString = '1234 6789 12345678 zzzz 1';
var searchString = 'zzzz';
var resultLength = 10;
var numberOfCharsEitherSide;
var regex, result;
numberOfCharsEitherSide = (resultLength - Math.floor(searchString.length)) / 2;
do {
regex = '.{0,' + numberOfCharsEitherSide + '}' +
searchString +
'.{0,' + numberOfCharsEitherSide + '}';
result = sourceString.match(regex);
numberOfCharsEitherSide++;
} while (result[0].length > 0 && result[0].length < resultLength);
alert('result: ' + result);