• Global community
    • Language:
      • Deutsch
      • English
      • Español
      • Français
      • Português
  • 日本語コミュニティ
    Dedicated community for Japanese speakers
  • 한국 커뮤니티
    Dedicated community for Korean speakers
Exit
0

Text extraction algorithm

Community Expert ,
Mar 30, 2020 Mar 30, 2020

Copy link to clipboard

Copied

Hi All,

I need help with a text extraction algorithm. Given a string of text inside a larger string, I need to extract a string of a certain number of characters with my given string in the middle. I am going to illustrate it with some nonsense paragraphs, but hopefully it will illustrate when I am trying to do. Here are three paragraphs with the given text "zzzz":

123456789 zzzz 1234567890
1 zzzz 12345 6789 1234567
1234 6789 12345678 zzzz 1

I want to extract 10 characters with the "zzzz" in the middle. If the "zzzz" can't be in the middle (as in the 2nd and 3rd paragraphs), I still want 10 characters. So, here is the result I want:

89 zzzz 12
1 zzzz 123
678 zzzz 1

Of course, there could be instances where the overall string contains less characters, but I want to set the maximum extraction to a specific number characters (10 in this case). So, given the overall length of a container string, the length of the target string and its position in the container string, and the number of total characters to extract, I am looking for a general algorithm to extract the characters, keeping the target string as close to the center as possible.

 

I am using ExtendScript but even pseudocode would be helpful. Any ideas or pointers would be appreciated. Thank you very much. -Rick

TOPICS
Scripting

Views

721

Translate

Translate

Report

Report
Community guidelines
Be kind and respectful, give credit to the original source of content, and search for duplicates before posting. Learn more
community guidelines
Community Expert ,
Mar 31, 2020 Mar 31, 2020

Copy link to clipboard

Copied

Dear Rick,

At least for the test cases this provides correct results:

main ();

function main() {
var values = ["123456789 zzzz 1234567890", "1 zzzz 12345 6789 1234567", "1234 6789 12345678 zzzz 1"] ;
var sConst = "zzzz";
  for (j= 0; j < 3; j++) {
    alert (GetMiddlePart(values[j], sConst));
  }
} // --- end main

function GetMiddlePart (string, sConst) {
var iLoc, j, lConst, lim, max, min, sResult;
  lConst = sConst.length;
  max = string.length;
  min = Math.floor((max-10)/2);
  lim = Math.floor((10-lConst)/2);
  iLoc = string.indexOf(sConst);
  if (iLoc > lim && iLoc < max-10) {
    sResult = string.substr(min, 10);
  } else if (iLoc < lim) { // towards beginning
    sResult = string.substr(0, 10);
  } else {            // towards the end
    sResult = string.substr(max-10);
  }
    return sResult;
} // --- end GetMiddlePart

k

Votes

Translate

Translate

Report

Report
Community guidelines
Be kind and respectful, give credit to the original source of content, and search for duplicates before posting. Learn more
community guidelines
Community Expert ,
Apr 01, 2020 Apr 01, 2020

Copy link to clipboard

Copied

This may be more general:

 

main ();
function main() {
var values = ["123456789 zzzz 1234567890", "1 zzzz 12345 6789 1234567"
            , "1234 6789 12345678 zzzz 1", "a zzzz q", "ab zzzz yz"];
var sCore = "zzzz", lBox = 10;
  for (j= 0; j < values.length; j++) {
    alert (values[j] + "\n" + GetMiddlePart(values[j], sCore, lBox));
  }
} // --- end main

function GetMiddlePart (string, sCore, lBox) {
var boundL, boundR, iLoc, j, lCore, lString, sResult, wrap;
  lString = string.length;
  lCore   = sCore.length;
  if (lBox >= lString) {
    return string;
  }
  boundL = Math.floor((lString-lBox)/2);
  iLoc   = string.indexOf(sCore);
  if (iLoc > boundL && iLoc < lString-lBox) {    // somewhere in the middle
    sResult = string.substr(boundL, lBox);
  } else if (iLoc < boundL) { // towards beginning
    sResult = string.substr(0, lBox);
  } else {            // towards the end
    sResult = string.substr(lString-lBox);
  }
    return sResult;
} // --- end GetMiddlePart

 

Votes

Translate

Translate

Report

Report
Community guidelines
Be kind and respectful, give credit to the original source of content, and search for duplicates before posting. Learn more
community guidelines
Community Beginner ,
Apr 24, 2020 Apr 24, 2020

Copy link to clipboard

Copied

LATEST
var sourceString = '1234 6789 12345678 zzzz 1';
var searchString = 'zzzz';
var resultLength = 10;

var numberOfCharsEitherSide;
var regex, result;

numberOfCharsEitherSide = (resultLength - Math.floor(searchString.length)) / 2;

do {
  regex = '.{0,' + numberOfCharsEitherSide + '}' +
    searchString +
    '.{0,' + numberOfCharsEitherSide + '}';
  result = sourceString.match(regex);
  numberOfCharsEitherSide++;
} while (result[0].length > 0 && result[0].length < resultLength);

alert('result: ' + result);

Votes

Translate

Translate

Report

Report
Community guidelines
Be kind and respectful, give credit to the original source of content, and search for duplicates before posting. Learn more
community guidelines