Index build from word list - surename, firstname....
Hi
I am in bad situation on job. Book with 600 indd pages without index tags
. I have list with names which need to index in simple way, name - pages...
List looks like:
Strayhorn, Billy
Stresemann, Gustav
Strickland, Edward
Strindberg, August
Strobel, Heinrich
Strode, Rosamund
Subotnick, Morton
Swingler, Randall
Szymanowski, Karol
Result need to be a like:
Swingler, Randall 55, 65-66, 78, 90
Need script which search from this word list and create index list. Search in reverse, Strayhorn, Billy is probably written as "Billy Strayhorn" in the book, sometimes is only first name. BUT, script need to ignore in search all non explicit letters, Croatian language. Example, in word list is Strayhorn, Billy. Script need to find all occurrence Billy Strayhorn in any combination on that page, like Billy(s) Strayhorn(s), and write page numbers to it.
I find this life saver script but this need to be modified for above needs. Any help is welcome...
//DESCRIPTION: Index direct
// Peter Kahrel -- www.kahrel.plus.com
#target indesign;
#targetengine index_direct;
if (app.documents.length < 2 || app.selection.length == 0 || app.selection[0].parentStory.constructor.name != "Story")
errorM ("Select a text frame or an insertion point\r(and open two or more documents).");
try {index_independent (app.documents[0])}
catch (e) {alert (e.message + "\r(line " + e.line + ")")};
//=======================================================================
function index_independent (doc)
{
var obj = get_data (doc);
create_index (obj);
}
function create_index (obj)
{
app.scriptPreferences.enableRedraw = false;
//~ if (app.selection.length == 0 && app.activeDocument.textFrames.length > 1)
//~ errorM ("Select a text frame or an insertion point.");
var top_text, pages;
check_list (app.activeDocument);
grep_settings (obj); // grep_settings MUST follow check_list
// get the topics from the concordance list as paragraph objects
var tops = app.selection[0].parentStory.paragraphs;
// get the names of all open documents (creates array of doc. names)
var docs = app.documents.everyItem().name;
// and delete current document (the concordance list) from the array (but it will stay open)
docs.shift();
// initialise message window
mess = createmessagewindow (40);
for (var i = 0; i < tops.length; i++)
{
// create text string from topic
top_text = make_topic (tops, obj);
// get page numbers of the topic from all open docs
pages = get_pages (docs, top_text, obj);
// If any, append to topic in concordance list.
// The last one is added at ins. point -2, the others at -1.
if (pages.length > 0)
{
if (i == tops.length-1)
tops.insertionPoints[-1].contents = obj.topic_separator + pages;
else
tops.insertionPoints[-2].contents = obj.topic_separator + pages;
}
else
if (obj.mark) tops.strikeThru = true;
}
if (obj.section_markers)
add_sections (tops);
mess.parent.close();
}
function add_sections (par)
{
mess.text = 'Adding sections...';
app.findGrepPreferences = null;
app.findGrepPreferences.findWhat = '\\w+?';
var ch1, ch2;
for (var i = par.length-2; i >= 0; i--)
{
try
{
ch1 = par.findGrep()[0].contents.toUpperCase();
ch2 = par[i+1].findGrep()[0].contents.toUpperCase();
if (ch1 != ch2)
par.insertionPoints[-1].contents = ch2+'\r';
}
catch (_) {}
}
try {par[0].insertionPoints[0].contents = par[0].findGrep()[0].contents.toUpperCase()+'\r';} catch (_){}
}
function make_topic (t, obj)
{
// remove trailing return
var s = t.contents.replace (/\r$/, "");
// show the topic -- can't do that anywhere else
mess.text = s;
// delete everything from comma or parenthesis, including any preceding space
//~ s = s.replace (/\s?[,(].+$/, "");
s = s.replace (/(,|_\().+$/, "");
// extract any subtopic
s = s.split("__").pop();
// whole-word-only search
s = "\\b"+s+"\\b";
// case sensitive if necessary
if (obj.case_sensitive == false)
s = "(?i)" + s;
return s
}
function get_pages (docs, t, obj)
{
var pages = [];
for (var i = 0; i < docs.length; i++)
{
var temp = get_one_doc (app.documents.item (docs), t, obj)
if (temp.length > 0)
pages = pages.concat (temp);
}
// sort and remove duplicates
if (pages.length > 1)
pages = rerange (pages, obj);
return pages
}
function get_one_doc (doc, t, obj)
{
var page, array = [];
app.findGrepPreferences.findWhat = t;
var pp = doc.findGrep();
for (var i = 0; i < pp.length; i++)
{
if (check_style (pp, obj))
{
page = find_page (pp);
if (page != null)
array.push (page.name);
}
}
return array
}
function find_page (o)
{
try
{
if (o.hasOwnProperty ("parentPage"))
return o.parentPage;
if (o.constructor.name == "Page")
return o;
switch (o.parent.constructor.name)
{
case "Character": return find_page (o.parent);
case "Cell": return find_page (o.parent.texts[0].parentTextFrames[0]);
case "Table" : return find_page (o.parent);
case "TextFrame" : return find_page (o.parent);
case "Group" : return find_page (o.parent);
case "Story": return find_page (o.parentTextFrames[0]);
case "Footnote": return find_page (o.parent.storyOffset);
case "Page" : return o.parent;
}
}
catch (_) {return null}
}
function check_style (w, obj)
{
if (obj.selected_styles == "") return true;
// exclude the selected paragraphs
if (obj.include == 0 && obj.selected_styles.indexOf ("£$"+w.appliedParagraphStyle.name+"£$") < 0) return true;
// include just the selected paragraphs
if (obj.include == 1 && obj.selected_styles.indexOf ("£$"+w.appliedParagraphStyle.name+"£$") > -1) return true;
}
// remove all trailing spaces and returns from the word list
function check_list (doc)
{
set_grep ("\\s+$", "", {}); // remove trailing space
doc.changeGrep();
set_grep ("\\x20\\x20+", "", {}); // remove spurious spaces
doc.changeGrep();
}
function createmessagewindow( le )
{
dlg = new Window ('palette');
dlg.alignChildren = ['left', 'top'];
txt = dlg.add ('statictext', undefined, "");
txt.characters = le;
dlg.show();
return txt
}
//=================================================================
// Sort, remove duplicates, and range page numbers
function rerange (pagenum_array, obj)
{
var page_nums = remove_duplicates (pagenum_array);
// split array into two: one roman, the other arabic
var page_nums = split_roman_arabic (page_nums);
page_nums.arabic = sort_range (page_nums.arabic, obj).join (", ");
if (page_nums.roman.length > 1)
{
// convert roman numbers to arabic
page_nums.roman = roman_to_arabic (page_nums.roman);
page_nums.roman = sort_range (page_nums.roman, obj)
// convert the arabic numbers in the roman array back to roman, return a string
page_nums.roman = arabic_to_roman (page_nums.roman.join (", "));
}
// concatenate the arrays
if (page_nums.roman.length > 0 && page_nums.arabic.length > 0) page_nums.roman += ", ";
page_nums = page_nums.roman + page_nums.arabic;
// Counter-intuitive construction here, but it's necessary
return page_nums;
}
function sort_range (array, obj)
{
//~ array = unrange (array); // not needed in this script
array = array.sort (sort_num);
if (obj.range_pages == true)
array = apply_page_ranges (array, obj)
return array
}
// return two element object, each element an array,
// one of roman numbers, the other of arabic numbers
function split_roman_arabic (array)
{
var roman = [];
var arab = [];
for (var i = 0; i < array.length; i++)
{
if (array.match (/^[-\u2013\d]+$/) != null)
arab.push (array);
else
roman.push (array);
}
return {roman: roman, arabic: arab}
}
//~ function arabic_to_roman (array)
//~ {
//~ for (var i = array.length-1; i > -1; i--)
//~ array = arabic2roman (array);
//~ return array
//~ }
function arabic_to_roman (s)
{
return s.replace (/\w+/g, arabic2roman)
}
function roman_to_arabic (array)
{
for (var i = array.length-1; i > -1; i--)
array = roman2arabic (array);
return array;
}
function sort_num (a, b) {return a - b}
//~ function sort_roman (a, b) {return roman2arabic (a) - roman2arabic (b)}
function arabic2roman (arab)
{
var roman = "";
if (arab < 10000)
{
var rom = [["","i","ii","iii","iv","v","vi","vii","viii","ix"],
["","x","xx","xxx","xl","l","lx","lxx","lxxx","xc"],
["","c","cc","ccc","cd","d","dc","dcc","dccc","cm"],
["","m","mm","mmm","4m","5m","6m","7m","8m","9m"]];
arab = String (arab).split("").reverse().join("");
for (var i = 0; i < arab.length; i++)
roman = rom[arab.charAt(i)] + roman;
}
return roman
}
function roman2arabic (roman)
{
var i;
var rom2arab = {i: 1, v: 5, x: 10, l: 50, c: 100, d: 500, m: 1000};
var arabic = rom2arab [roman.substr (-1)];
for (i = roman.length-2; i > -1; i--)
{
if (rom2arab [roman] < rom2arab [roman[i+1]])
arabic -= rom2arab [roman];
else
arabic += rom2arab [roman];
}
return arabic
}
function remove_duplicates (array)
{
var temp = [];
var dup = [];
for (var i = array.length-1; i > -1; i--)
{
if (array != undefined && !dup[array])
{
dup[array] = true;
temp.push (array);
}
}
return temp
} // remove_duplicates
function apply_page_ranges (array, obj)
{
var tolerance = obj.tolerance+1;
var temp = [];
var range = false;
for (var i = 0; i < array.length; i++)
{
temp.push (array);
while (array[i+1] - array <= tolerance)
{i++; range = true}
if (range)
temp[temp.length-1] += obj.dash + array;
range = false;
}
return temp;
} // apply_page_ranges
// undo ranging (and digit dropping)
function unrange (array)
{
function expand_num ()
{
// 123-6 > 123-126
function undrop (from, to) {return from.slice (0, from.length-to.length) + to};
var expanded = "", start = arguments[1], stop = arguments[2];
if (start.length > stop.length)
stop = undrop (start, stop);
start = +start; stop = +stop;
for (var i = start; i < stop; i++)
expanded += i + ",";
expanded += stop;
return expanded
} // expand_num
var s = array.join (",");
s = s.replace (/(\d+)[-\u2013](\d+)/g, expand_num);
return s.split (",")
} // unrange
// End rerange ============================================================
function errorM (m)
{
alert (m);
exit ();
}
function set_grep (find, replace, options)
{
app.findGrepPreferences = app.changeGrepPreferences = null;
app.findGrepPreferences.findWhat = find;
app.changeGrepPreferences.changeTo = replace;
if (options == undefined) options = {};
if (options.PS !== undefined) app.findGrepPreferences.appliedParagraphStyle = options.PS;
if (options.CS !== undefined) app.findGrepPreferences.appliedCharacterStyle = options.CS;
app.findChangeGrepOptions.properties =
{
includeFootnotes: options.FN !== undefined,
includeMasterPages: options.M !== undefined,
includeHiddenLayers: options.HL !== undefined,
includeLockedLayersForFind: options.LL !== undefined,
includeLockedStoriesForFind: options.LS !== undefined
}
}
function grep_settings (o)
{
app.findGrepPreferences = app.changeGrepPreferences = null;
app.findChangeGrepOptions.properties = {
includeFootnotes: o.incFN,
includeHiddenLayers: o.incHL,
includeLockedLayersForFind: o.incLL,
includeLockedStoriesForFind: o.incLS
}
}
// End create_index ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
function get_data (doc)
{
var history_file = script_dir() + "/index_direct.txt";
var history = read_history (history_file);
// Get the document's paragraph styles
var list2 = doc.paragraphStyles.everyItem().name;
// remove the first item ([No Paragraph])
list2.shift ();
// get the styles in the other doc just in case we want to load them (can't do that after the dialog is displayed)
var parstyles = app.documents[1].paragraphStyles.everyItem().name;
parstyles.shift();
var w = new Window ("dialog", "Create independent index", undefined, {closeButton: false});
w.alignChildren = "left";
var panel = w.add ("panel", undefined, "Select paragraph styles");
panel.orientation = "row";
var list1 = panel.add ("listbox", undefined, list1, {multiselect: true});
var addbuttons = panel.add ("group");
addbuttons.orientation = "column";
addbuttons.alignChildren = "fill";
var add_ = addbuttons.add ("button", undefined, "<---Add selected");
var add_all = addbuttons.add ("button", undefined, "<---Add all");
var remove_ = addbuttons.add ("button", undefined, "Remove selected --->");
var remove_all = addbuttons.add ("button", undefined, "Remove all --->");
var load_styles = addbuttons.add ("button", undefined, "Load styles");
var sort_styles = addbuttons.add ("button", undefined, "Sort styles");
var list2 = panel.add ("listbox", undefined, parstyles, {multiselect: true});
list1.preferredSize = list2.preferredSize = [200, 200];
var clude = w.add ("panel");
clude.orientation = "row";
clude.alignment = "fill";
clude.alignChildren = "left";
clude.add ("radiobutton", undefined, "\u00A0Exclude the selected paragraph styles");
clude.add ("radiobutton", undefined, "\u00A0Include ONLY the selected paragraph styles");
clude.children[0].value = true;
var group23 = w.add ("group");
group23.alignChildren = "top";
var group2 = group23.add ("group");
group2.orientation = "column";
group2.alignChildren = "left";
var gr0 = group2.add ("group");
gr0.add ("statictext", undefined, "Topic-page separator: ");
var topic_sep = gr0.add ("dropdownlist", undefined, ["Space", "En-space", "Comma+space"]);
topic_sep.minimumSize.width = 120;
topic_sep.selection = 1;
var csense = group2.add ("checkbox", undefined, "\u00A0Match topics case-sensitively");
csense.value = true;
var ranging = group2.add ("group");
var range = ranging.add ("checkbox", undefined, "\u00A0Range pages");
range.value = true;
var ranging_sub = ranging.add ("group");
ranging_sub.add ("statictext", undefined, "Use: ");
var range_dash = ranging_sub.add ("dropdownlist", undefined, ["Hyphen", "En-dash"]);
range_dash.minimumSize.width = 80;
range_dash.selection = 1;
ranging_sub.add ("statictext", undefined, "Tolerance:")
var tolerance = ranging_sub.add ("dropdownlist", undefined, ["0","1","2","3","4","5","6","7","8","9","10"]);
tolerance.minimumSize.width = 50;
tolerance.selection = 0;
var section_markers = group2.add ('checkbox', undefined, '\u00A0Add section headings')
var mark = group2.add ("checkbox", undefined, "\u00A0Mark topics without page references");
var group3 = group23.add ("group");
group3.orientation = "column";
group3.margins = [40,0,0,0];
group3.alignChildren = "left";
var includeLL = group3.add ("checkbox", undefined, "\u00A0Include locked layers");
var includeHL = group3.add ("checkbox", undefined, "\u00A0Include hidden layers");
var includeLS = group3.add ("checkbox", undefined, "\u00A0Include locked stories");
var includeFN = group3.add ("checkbox", undefined, "\u00A0Include footnotes");
var buttons = w.add ("group");
buttons.alignment = "right";
var ok_button = buttons.add ("button", undefined, "OK");
var cancel_button = buttons.add ("button", undefined, "Cancel");
range.onClick = function () {ranging_sub.enabled = this.value}
// Restore the selections from the previous run in the dialog -----------------------------------------------------
if (history.selected_styles.length > 0)
previously_selected_styles (history.selected_styles.split ("£$"));
clude.children[history.include].value = true;
topic_sep.selection = topic_sep.find (history.topic_separator);
csense.value = history.case_sensitive;
range.value = history.range_pages;
mark.value = history.mark;
section_markers.value = history.section_markers;
range_dash.selection = range_dash.find (history.dash);
tolerance.selection = tolerance.find (history.tolerance);
includeLL.value = history.incLL;
includeHL.value = history.incHL;
includeLS.value = history.incLS;
includeFN.value = history.incFN;
list2.selection = 0;
// Set dependencies
clude.enabled = list1.items.length > 0;
ranging_sub.enabled = range.value;
function previously_selected_styles (array)
{
for (var i = 0; i < array.length; i++)
{
if (list2.find (array) != null)
{
list1.add ("item", array);
list2.remove (array)
}
}
} // previously_selected_styles
// End restore settings --------------------------------------------------------------
// enable/disable buttons depending on whether a list has any items
add_.enabled = add_all.enabled = list2.items.length;
remove_.enabled = remove_all.enabled = list1.items.length;
add_.onClick = function () {if (list2.selection != null) move_item (list2.selection, list2, list1); clude.enabled = list1.items.length > 0};
add_all.onClick = function () {move_all (list2, list1)};
remove_.onClick = function () {if (list1.selection != null) move_item (list1.selection, list1, list2); clude.enabled = list1.items.length > 0};
remove_all.onClick = function () {move_all (list1, list2); clude.enabled = false};
load_styles.onClick = function () {load_pstyles ()};
sort_styles.onClick = function () {sort_listbox (list2)};
list1.onChange = function () {var sel = list1.selection; list2.selection = null; list1.selection = sel};
list2.onChange = function () {var sel = list2.selection; list1.selection = null; list2.selection = sel};
function load_pstyles ()
{
//~ app.documents[0].importStyles(ImportFormat.paragraphStylesFormat, app.documents[1].fullName, GlobalClashResolutionStrategy.doNotLoadTheStyle);
//~ var temp = doc.paragraphStyles.everyItem().name;
//~ temp.shift ();
list2.removeAll();
for (var i = 0; i < parstyles.length; i++)
list2.add ("item", parstyles);
list2.selection = 0;
}
function move_item (to_add, source, target)
{
// Record the index of the (first) selected item so that we can replace the cursor
var sel = source.selection[0].index;
for (var i = 0; i < to_add.length; i++)
target.add ("item", to_add.text);
for (var i = 0; i < to_add.length; i++)
source.remove (to_add.text);
sort_listbox (target);
add_.enabled = add_all.enabled = list2.items.length;
remove_.enabled = remove_all.enabled = list1.items.length;
// Replace the cursor
if (source.items.length > 0)
{
if (sel >= source.items.length)
sel = source.items.length-1;
source.selection = sel;
}
} // move_item
function move_all (source, target)
{
var to_sort = target.items.length > 0;
for (var i = 0; i < source.items.length; i++)
target.add ("item", source.items.text);
source.removeAll ();
if (to_sort == true)
sort_listbox (target);
add_.enabled = add_all.enabled = list2.items.length;
remove_.enabled = remove_all.enabled = list1.items.length;
} // move_all
function sort_listbox (list_box)
{
var array = list_to_stringarray (list_box);
array = array.sort (nocase);
list_box.removeAll ();
for (i = 0; i < array.length; i++)
list_box.add ("item", array);
}
function nocase (a, b) {return a.toLowerCase() > b.toLowerCase()}
function list_to_stringarray (list)
{
var array = [];
for (var i = 0; i < list.items.length; i++)
array.push (list.items.text);
return array;
}
function tsep (s)
{
switch (s)
{
case "Space": return " ";
case "En-space": return "\u2002";
case "Comma+space": return ", ";
default: return " ";
}
}
function psep (s)
{
switch (s)
{
case "Hyphen": return "-";
case "En-dash": return "\u2013";
default: return "\u2013";
}
}
//~ cancel_button.onClick = function () {w.close(); exit ()};
if (w.show() == 1)
{
if (list1.items.length == 0)
var sel_styles = "";
else
var sel_styles = "£$"+list_to_stringarray (list1).join ("£$")+"£$";
var obj = {selected_styles: sel_styles,
include: clude.children[0].value ? 0 : 1,
case_sensitive: csense.value,
topic_separator: topic_sep.selection.text,
range_pages: range.value,
section_markers: section_markers.value,
mark: mark.value,
dash: range_dash.selection.text,
tolerance: tolerance.selection.text,
incLL: includeLL.value,
incHL: includeHL.value,
incLS: includeLS.value,
incFN: includeFN.value
}
write_history (history_file, obj);
obj.topic_separator = tsep (obj.topic_separator);
obj.dash = psep (obj.dash);
w.close();
return obj;
}
else
{
w.close();
exit ();
}
} // index_independent
function read_history (f)
{
// default values in case there's no history file
var obj = {selected_styles: [],
include: 0,
cs: true,
topic_separator: "En-space",
range_pages: true,
mark: true,
dash: "En-dash",
tolerance: 0,
section_markers: true,
incLL: true,
incHL: false,
incLS: true,
incFN: true
};
f = File (f);
if (f.exists)
{
f.open ("r");
obj = eval (f.read ());
f.close ();
}
return obj;
}
function write_history (f, obj)
{
f = File (f);
f.open ("w");
f.write (obj.toSource ());
f.close ();
}
function script_dir()
{
try {return File (app.activeScript).path}
catch(e) {return File (e.fileName).path}
}
//++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++