Copy link to clipboard
Copied
Hi
I am in bad situation on job. Book with 600 indd pages without index tags
. I have list with names which need to index in simple way, name - pages...
List looks like:
Strayhorn, Billy
Stresemann, Gustav
Strickland, Edward
Strindberg, August
Strobel, Heinrich
Strode, Rosamund
Subotnick, Morton
Swingler, Randall
Szymanowski, Karol
Result need to be a like:
Swingler, Randall 55, 65-66, 78, 90
Need script which search from this word list and create index list. Search in reverse, Strayhorn, Billy is probably written as "Billy Strayhorn" in the book, sometimes is only first name. BUT, script need to ignore in search all non explicit letters, Croatian language. Example, in word list is Strayhorn, Billy. Script need to find all occurrence Billy Strayhorn in any combination on that page, like Billy(s) Strayhorn(s), and write page numbers to it.
I find this life saver script but this need to be modified for above needs. Any help is welcome...
//DESCRIPTION: Index direct
// Peter Kahrel -- www.kahrel.plus.com
#target indesign;
#targetengine index_direct;
if (app.documents.length < 2 || app.selection.length == 0 || app.selection[0].parentStory.constructor.name != "Story")
errorM ("Select a text frame or an insertion point\r(and open two or more documents).");
try {index_independent (app.documents[0])}
catch (e) {alert (e.message + "\r(line " + e.line + ")")};
//=======================================================================
function index_independent (doc)
{
var obj = get_data (doc);
create_index (obj);
}
function create_index (obj)
{
app.scriptPreferences.enableRedraw = false;
//~ if (app.selection.length == 0 && app.activeDocument.textFrames.length > 1)
//~ errorM ("Select a text frame or an insertion point.");
var top_text, pages;
check_list (app.activeDocument);
grep_settings (obj); // grep_settings MUST follow check_list
// get the topics from the concordance list as paragraph objects
var tops = app.selection[0].parentStory.paragraphs;
// get the names of all open documents (creates array of doc. names)
var docs = app.documents.everyItem().name;
// and delete current document (the concordance list) from the array (but it will stay open)
docs.shift();
// initialise message window
mess = createmessagewindow (40);
for (var i = 0; i < tops.length; i++)
{
// create text string from topic
top_text = make_topic (tops, obj);
// get page numbers of the topic from all open docs
pages = get_pages (docs, top_text, obj);
// If any, append to topic in concordance list.
// The last one is added at ins. point -2, the others at -1.
if (pages.length > 0)
{
if (i == tops.length-1)
tops.insertionPoints[-1].contents = obj.topic_separator + pages;
else
tops.insertionPoints[-2].contents = obj.topic_separator + pages;
}
else
if (obj.mark) tops.strikeThru = true;
}
if (obj.section_markers)
add_sections (tops);
mess.parent.close();
}
function add_sections (par)
{
mess.text = 'Adding sections...';
app.findGrepPreferences = null;
app.findGrepPreferences.findWhat = '\\w+?';
var ch1, ch2;
for (var i = par.length-2; i >= 0; i--)
{
try
{
ch1 = par.findGrep()[0].contents.toUpperCase();
ch2 = par[i+1].findGrep()[0].contents.toUpperCase();
if (ch1 != ch2)
par.insertionPoints[-1].contents = ch2+'\r';
}
catch (_) {}
}
try {par[0].insertionPoints[0].contents = par[0].findGrep()[0].contents.toUpperCase()+'\r';} catch (_){}
}
function make_topic (t, obj)
{
// remove trailing return
var s = t.contents.replace (/\r$/, "");
// show the topic -- can't do that anywhere else
mess.text = s;
// delete everything from comma or parenthesis, including any preceding space
//~ s = s.replace (/\s?[,(].+$/, "");
s = s.replace (/(,|_\().+$/, "");
// extract any subtopic
s = s.split("__").pop();
// whole-word-only search
s = "\\b"+s+"\\b";
// case sensitive if necessary
if (obj.case_sensitive == false)
s = "(?i)" + s;
return s
}
function get_pages (docs, t, obj)
{
var pages = [];
for (var i = 0; i < docs.length; i++)
{
var temp = get_one_doc (app.documents.item (docs), t, obj)
if (temp.length > 0)
pages = pages.concat (temp);
}
// sort and remove duplicates
if (pages.length > 1)
pages = rerange (pages, obj);
return pages
}
function get_one_doc (doc, t, obj)
{
var page, array = [];
app.findGrepPreferences.findWhat = t;
var pp = doc.findGrep();
for (var i = 0; i < pp.length; i++)
{
if (check_style (pp, obj))
{
page = find_page (pp);
if (page != null)
array.push (page.name);
}
}
return array
}
function find_page (o)
{
try
{
if (o.hasOwnProperty ("parentPage"))
return o.parentPage;
if (o.constructor.name == "Page")
return o;
switch (o.parent.constructor.name)
{
case "Character": return find_page (o.parent);
case "Cell": return find_page (o.parent.texts[0].parentTextFrames[0]);
case "Table" : return find_page (o.parent);
case "TextFrame" : return find_page (o.parent);
case "Group" : return find_page (o.parent);
case "Story": return find_page (o.parentTextFrames[0]);
case "Footnote": return find_page (o.parent.storyOffset);
case "Page" : return o.parent;
}
}
catch (_) {return null}
}
function check_style (w, obj)
{
if (obj.selected_styles == "") return true;
// exclude the selected paragraphs
if (obj.include == 0 && obj.selected_styles.indexOf ("£$"+w.appliedParagraphStyle.name+"£$") < 0) return true;
// include just the selected paragraphs
if (obj.include == 1 && obj.selected_styles.indexOf ("£$"+w.appliedParagraphStyle.name+"£$") > -1) return true;
}
// remove all trailing spaces and returns from the word list
function check_list (doc)
{
set_grep ("\\s+$", "", {}); // remove trailing space
doc.changeGrep();
set_grep ("\\x20\\x20+", "", {}); // remove spurious spaces
doc.changeGrep();
}
function createmessagewindow( le )
{
dlg = new Window ('palette');
dlg.alignChildren = ['left', 'top'];
txt = dlg.add ('statictext', undefined, "");
txt.characters = le;
dlg.show();
return txt
}
//=================================================================
// Sort, remove duplicates, and range page numbers
function rerange (pagenum_array, obj)
{
var page_nums = remove_duplicates (pagenum_array);
// split array into two: one roman, the other arabic
var page_nums = split_roman_arabic (page_nums);
page_nums.arabic = sort_range (page_nums.arabic, obj).join (", ");
if (page_nums.roman.length > 1)
{
// convert roman numbers to arabic
page_nums.roman = roman_to_arabic (page_nums.roman);
page_nums.roman = sort_range (page_nums.roman, obj)
// convert the arabic numbers in the roman array back to roman, return a string
page_nums.roman = arabic_to_roman (page_nums.roman.join (", "));
}
// concatenate the arrays
if (page_nums.roman.length > 0 && page_nums.arabic.length > 0) page_nums.roman += ", ";
page_nums = page_nums.roman + page_nums.arabic;
// Counter-intuitive construction here, but it's necessary
return page_nums;
}
function sort_range (array, obj)
{
//~ array = unrange (array); // not needed in this script
array = array.sort (sort_num);
if (obj.range_pages == true)
array = apply_page_ranges (array, obj)
return array
}
// return two element object, each element an array,
// one of roman numbers, the other of arabic numbers
function split_roman_arabic (array)
{
var roman = [];
var arab = [];
for (var i = 0; i < array.length; i++)
{
if (array.match (/^[-\u2013\d]+$/) != null)
arab.push (array);
else
roman.push (array);
}
return {roman: roman, arabic: arab}
}
//~ function arabic_to_roman (array)
//~ {
//~ for (var i = array.length-1; i > -1; i--)
//~ array = arabic2roman (array);
//~ return array
//~ }
function arabic_to_roman (s)
{
return s.replace (/\w+/g, arabic2roman)
}
function roman_to_arabic (array)
{
for (var i = array.length-1; i > -1; i--)
array = roman2arabic (array);
return array;
}
function sort_num (a, b) {return a - b}
//~ function sort_roman (a, b) {return roman2arabic (a) - roman2arabic (b)}
function arabic2roman (arab)
{
var roman = "";
if (arab < 10000)
{
var rom = [["","i","ii","iii","iv","v","vi","vii","viii","ix"],
["","x","xx","xxx","xl","l","lx","lxx","lxxx","xc"],
["","c","cc","ccc","cd","d","dc","dcc","dccc","cm"],
["","m","mm","mmm","4m","5m","6m","7m","8m","9m"]];
arab = String (arab).split("").reverse().join("");
for (var i = 0; i < arab.length; i++)
roman = rom[arab.charAt(i)] + roman;
}
return roman
}
function roman2arabic (roman)
{
var i;
var rom2arab = {i: 1, v: 5, x: 10, l: 50, c: 100, d: 500, m: 1000};
var arabic = rom2arab [roman.substr (-1)];
for (i = roman.length-2; i > -1; i--)
{
if (rom2arab [roman] < rom2arab [roman[i+1]])
arabic -= rom2arab [roman];
else
arabic += rom2arab [roman];
}
return arabic
}
function remove_duplicates (array)
{
var temp = [];
var dup = [];
for (var i = array.length-1; i > -1; i--)
{
if (array != undefined && !dup[array])
{
dup[array] = true;
temp.push (array);
}
}
return temp
} // remove_duplicates
function apply_page_ranges (array, obj)
{
var tolerance = obj.tolerance+1;
var temp = [];
var range = false;
for (var i = 0; i < array.length; i++)
{
temp.push (array);
while (array[i+1] - array <= tolerance)
{i++; range = true}
if (range)
temp[temp.length-1] += obj.dash + array;
range = false;
}
return temp;
} // apply_page_ranges
// undo ranging (and digit dropping)
function unrange (array)
{
function expand_num ()
{
// 123-6 > 123-126
function undrop (from, to) {return from.slice (0, from.length-to.length) + to};
var expanded = "", start = arguments[1], stop = arguments[2];
if (start.length > stop.length)
stop = undrop (start, stop);
start = +start; stop = +stop;
for (var i = start; i < stop; i++)
expanded += i + ",";
expanded += stop;
return expanded
} // expand_num
var s = array.join (",");
s = s.replace (/(\d+)[-\u2013](\d+)/g, expand_num);
return s.split (",")
} // unrange
// End rerange ============================================================
function errorM (m)
{
alert (m);
exit ();
}
function set_grep (find, replace, options)
{
app.findGrepPreferences = app.changeGrepPreferences = null;
app.findGrepPreferences.findWhat = find;
app.changeGrepPreferences.changeTo = replace;
if (options == undefined) options = {};
if (options.PS !== undefined) app.findGrepPreferences.appliedParagraphStyle = options.PS;
if (options.CS !== undefined) app.findGrepPreferences.appliedCharacterStyle = options.CS;
app.findChangeGrepOptions.properties =
{
includeFootnotes: options.FN !== undefined,
includeMasterPages: options.M !== undefined,
includeHiddenLayers: options.HL !== undefined,
includeLockedLayersForFind: options.LL !== undefined,
includeLockedStoriesForFind: options.LS !== undefined
}
}
function grep_settings (o)
{
app.findGrepPreferences = app.changeGrepPreferences = null;
app.findChangeGrepOptions.properties = {
includeFootnotes: o.incFN,
includeHiddenLayers: o.incHL,
includeLockedLayersForFind: o.incLL,
includeLockedStoriesForFind: o.incLS
}
}
// End create_index ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
function get_data (doc)
{
var history_file = script_dir() + "/index_direct.txt";
var history = read_history (history_file);
// Get the document's paragraph styles
var list2 = doc.paragraphStyles.everyItem().name;
// remove the first item ([No Paragraph])
list2.shift ();
// get the styles in the other doc just in case we want to load them (can't do that after the dialog is displayed)
var parstyles = app.documents[1].paragraphStyles.everyItem().name;
parstyles.shift();
var w = new Window ("dialog", "Create independent index", undefined, {closeButton: false});
w.alignChildren = "left";
var panel = w.add ("panel", undefined, "Select paragraph styles");
panel.orientation = "row";
var list1 = panel.add ("listbox", undefined, list1, {multiselect: true});
var addbuttons = panel.add ("group");
addbuttons.orientation = "column";
addbuttons.alignChildren = "fill";
var add_ = addbuttons.add ("button", undefined, "<---Add selected");
var add_all = addbuttons.add ("button", undefined, "<---Add all");
var remove_ = addbuttons.add ("button", undefined, "Remove selected --->");
var remove_all = addbuttons.add ("button", undefined, "Remove all --->");
var load_styles = addbuttons.add ("button", undefined, "Load styles");
var sort_styles = addbuttons.add ("button", undefined, "Sort styles");
var list2 = panel.add ("listbox", undefined, parstyles, {multiselect: true});
list1.preferredSize = list2.preferredSize = [200, 200];
var clude = w.add ("panel");
clude.orientation = "row";
clude.alignment = "fill";
clude.alignChildren = "left";
clude.add ("radiobutton", undefined, "\u00A0Exclude the selected paragraph styles");
clude.add ("radiobutton", undefined, "\u00A0Include ONLY the selected paragraph styles");
clude.children[0].value = true;
var group23 = w.add ("group");
group23.alignChildren = "top";
var group2 = group23.add ("group");
group2.orientation = "column";
group2.alignChildren = "left";
var gr0 = group2.add ("group");
gr0.add ("statictext", undefined, "Topic-page separator: ");
var topic_sep = gr0.add ("dropdownlist", undefined, ["Space", "En-space", "Comma+space"]);
topic_sep.minimumSize.width = 120;
topic_sep.selection = 1;
var csense = group2.add ("checkbox", undefined, "\u00A0Match topics case-sensitively");
csense.value = true;
var ranging = group2.add ("group");
var range = ranging.add ("checkbox", undefined, "\u00A0Range pages");
range.value = true;
var ranging_sub = ranging.add ("group");
ranging_sub.add ("statictext", undefined, "Use: ");
var range_dash = ranging_sub.add ("dropdownlist", undefined, ["Hyphen", "En-dash"]);
range_dash.minimumSize.width = 80;
range_dash.selection = 1;
ranging_sub.add ("statictext", undefined, "Tolerance:")
var tolerance = ranging_sub.add ("dropdownlist", undefined, ["0","1","2","3","4","5","6","7","8","9","10"]);
tolerance.minimumSize.width = 50;
tolerance.selection = 0;
var section_markers = group2.add ('checkbox', undefined, '\u00A0Add section headings')
var mark = group2.add ("checkbox", undefined, "\u00A0Mark topics without page references");
var group3 = group23.add ("group");
group3.orientation = "column";
group3.margins = [40,0,0,0];
group3.alignChildren = "left";
var includeLL = group3.add ("checkbox", undefined, "\u00A0Include locked layers");
var includeHL = group3.add ("checkbox", undefined, "\u00A0Include hidden layers");
var includeLS = group3.add ("checkbox", undefined, "\u00A0Include locked stories");
var includeFN = group3.add ("checkbox", undefined, "\u00A0Include footnotes");
var buttons = w.add ("group");
buttons.alignment = "right";
var ok_button = buttons.add ("button", undefined, "OK");
var cancel_button = buttons.add ("button", undefined, "Cancel");
range.onClick = function () {ranging_sub.enabled = this.value}
// Restore the selections from the previous run in the dialog -----------------------------------------------------
if (history.selected_styles.length > 0)
previously_selected_styles (history.selected_styles.split ("£$"));
clude.children[history.include].value = true;
topic_sep.selection = topic_sep.find (history.topic_separator);
csense.value = history.case_sensitive;
range.value = history.range_pages;
mark.value = history.mark;
section_markers.value = history.section_markers;
range_dash.selection = range_dash.find (history.dash);
tolerance.selection = tolerance.find (history.tolerance);
includeLL.value = history.incLL;
includeHL.value = history.incHL;
includeLS.value = history.incLS;
includeFN.value = history.incFN;
list2.selection = 0;
// Set dependencies
clude.enabled = list1.items.length > 0;
ranging_sub.enabled = range.value;
function previously_selected_styles (array)
{
for (var i = 0; i < array.length; i++)
{
if (list2.find (array) != null)
{
list1.add ("item", array);
list2.remove (array)
}
}
} // previously_selected_styles
// End restore settings --------------------------------------------------------------
// enable/disable buttons depending on whether a list has any items
add_.enabled = add_all.enabled = list2.items.length;
remove_.enabled = remove_all.enabled = list1.items.length;
add_.onClick = function () {if (list2.selection != null) move_item (list2.selection, list2, list1); clude.enabled = list1.items.length > 0};
add_all.onClick = function () {move_all (list2, list1)};
remove_.onClick = function () {if (list1.selection != null) move_item (list1.selection, list1, list2); clude.enabled = list1.items.length > 0};
remove_all.onClick = function () {move_all (list1, list2); clude.enabled = false};
load_styles.onClick = function () {load_pstyles ()};
sort_styles.onClick = function () {sort_listbox (list2)};
list1.onChange = function () {var sel = list1.selection; list2.selection = null; list1.selection = sel};
list2.onChange = function () {var sel = list2.selection; list1.selection = null; list2.selection = sel};
function load_pstyles ()
{
//~ app.documents[0].importStyles(ImportFormat.paragraphStylesFormat, app.documents[1].fullName, GlobalClashResolutionStrategy.doNotLoadTheStyle);
//~ var temp = doc.paragraphStyles.everyItem().name;
//~ temp.shift ();
list2.removeAll();
for (var i = 0; i < parstyles.length; i++)
list2.add ("item", parstyles);
list2.selection = 0;
}
function move_item (to_add, source, target)
{
// Record the index of the (first) selected item so that we can replace the cursor
var sel = source.selection[0].index;
for (var i = 0; i < to_add.length; i++)
target.add ("item", to_add.text);
for (var i = 0; i < to_add.length; i++)
source.remove (to_add.text);
sort_listbox (target);
add_.enabled = add_all.enabled = list2.items.length;
remove_.enabled = remove_all.enabled = list1.items.length;
// Replace the cursor
if (source.items.length > 0)
{
if (sel >= source.items.length)
sel = source.items.length-1;
source.selection = sel;
}
} // move_item
function move_all (source, target)
{
var to_sort = target.items.length > 0;
for (var i = 0; i < source.items.length; i++)
target.add ("item", source.items.text);
source.removeAll ();
if (to_sort == true)
sort_listbox (target);
add_.enabled = add_all.enabled = list2.items.length;
remove_.enabled = remove_all.enabled = list1.items.length;
} // move_all
function sort_listbox (list_box)
{
var array = list_to_stringarray (list_box);
array = array.sort (nocase);
list_box.removeAll ();
for (i = 0; i < array.length; i++)
list_box.add ("item", array);
}
function nocase (a, b) {return a.toLowerCase() > b.toLowerCase()}
function list_to_stringarray (list)
{
var array = [];
for (var i = 0; i < list.items.length; i++)
array.push (list.items.text);
return array;
}
function tsep (s)
{
switch (s)
{
case "Space": return " ";
case "En-space": return "\u2002";
case "Comma+space": return ", ";
default: return " ";
}
}
function psep (s)
{
switch (s)
{
case "Hyphen": return "-";
case "En-dash": return "\u2013";
default: return "\u2013";
}
}
//~ cancel_button.onClick = function () {w.close(); exit ()};
if (w.show() == 1)
{
if (list1.items.length == 0)
var sel_styles = "";
else
var sel_styles = "£$"+list_to_stringarray (list1).join ("£$")+"£$";
var obj = {selected_styles: sel_styles,
include: clude.children[0].value ? 0 : 1,
case_sensitive: csense.value,
topic_separator: topic_sep.selection.text,
range_pages: range.value,
section_markers: section_markers.value,
mark: mark.value,
dash: range_dash.selection.text,
tolerance: tolerance.selection.text,
incLL: includeLL.value,
incHL: includeHL.value,
incLS: includeLS.value,
incFN: includeFN.value
}
write_history (history_file, obj);
obj.topic_separator = tsep (obj.topic_separator);
obj.dash = psep (obj.dash);
w.close();
return obj;
}
else
{
w.close();
exit ();
}
} // index_independent
function read_history (f)
{
// default values in case there's no history file
var obj = {selected_styles: [],
include: 0,
cs: true,
topic_separator: "En-space",
range_pages: true,
mark: true,
dash: "En-dash",
tolerance: 0,
section_markers: true,
incLL: true,
incHL: false,
incLS: true,
incFN: true
};
f = File (f);
if (f.exists)
{
f.open ("r");
obj = eval (f.read ());
f.close ();
}
return obj;
}
function write_history (f, obj)
{
f = File (f);
f.open ("w");
f.write (obj.toSource ());
f.close ();
}
function script_dir()
{
try {return File (app.activeScript).path}
catch(e) {return File (e.fileName).path}
}
//++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Copy link to clipboard
Copied
Hi,
Peter will probably answer soon.
For those who'd wonder how to do the same job using IndexMatic, here is a quick tutorial:

@+
Marc
Copy link to clipboard
Copied
If script can capture 90% good words we win...Problem is that authors need to tag index when writing text. Scripting like this is only first aid and not good workflow...
From video example indexmatic what mean in query => $0, ?
Copy link to clipboard
Copied
That script of mine you quote considers text up to the first comma or opening parenthesis, whichever comes first. So this line:
Strayhorn, Billy
causes the script to look for 'Strayhorn'. If you want to look for Staryhorns as well, add it:
Strayhorns, Billy
and all other case markers that a name can have. If all names can have an s added (or other case markers), you can expand your name with the following grep find/replace:
Find what: ^(.+?)(,.+)
Change to: $1$2\r$1s$2\r
which transforms your list to this format:
Strayhorn, Billy
Strayhorns, Billy
Stresemann, Gustav
Stresemanns, Gustav
Strickland, Edward
Stricklands, Edward
etc.
No need to bother with the first names, the script doesn't see them.
Then of course you have to merge entries, and if there are two or more Stricklands, merge them . . .
Copy link to clipboard
Copied
Hi foler,
From video example indexmatic what mean in query => $0, ?
• The syntax => represents the rewriting operator. It is described here: http://indiscripts.com/blog/public/scripts/en_IndexMatic2-Manual.pdf#page=15
• $0 represents the match (hence the lastname in that particular case.) In fact you could use the form « Strayhorn => Strayhorn, Billy » as well. The symbol $0 just makes the queries shorter in this example. The technique of using the $n symbols becomes useful in more advanced queries.
Note: if you have to merge the forms Strayhorn and Strayhorns (with a 's') in a single entry, then your query syntax will be:
/(Strayhorn)s?/ => $1, Billy
or, if you prefer the long form without $:
/Strayhorns?/ => Strayhorn, Billy
@+
Marc
Copy link to clipboard
Copied
> ... sometimes is only first name.
Hopefully you are mistaken here and you meant last name! If you are right, then a script would find all "Billy"'s in the entire document and index them as "Billy Strayhorn". For such blind indexing, it's already a problem that persons may be referred to only by last name, and there could be an "Edward Strickland" but also an "Alice Strickland" and a "Bob Strickland". If they are referred to by just "As Strickland points out...", only very carefully reading the text may tell you which one is which.
Bottom line: do not expect such an automatic index to be correct. A human needs to verify each entry, and also verify if someone is referred to on a next page without mentioning the actual name.
Get ready! An upgraded Adobe Community experience is coming in January.
Learn more