Copy link to clipboard
Copied
I have a task to complete the below requirement for Index part in a book. Please help me.
I have sequence of numbers like this,
Index1, 26, 35, 36, 37, 47
Index2, 65, 78, 79, 89, 90
I need to change like this
i.e., the number which are in sequence order (chronological order) needs to be changed as ndash.
Copy link to clipboard
Copied
Thanks for the fun morning algorithm exercise...
I think this will do it for you.
Regards
Bob
indexize = function( ar ) {
var out = new Array();
for ( var i = 0; i < ar.length; i++ ) {
var a = i + 1;
var current = ar[ i ];
var concat = false;
while ( parseInt( ar[ a++ ] )== ( current + 1 ) ) {
current = ar[ a - 1 ];
concat = true;
}
if ( concat ) {
if ( parseInt( ar[ i ] ) + 1 == current ) {
out.push( ar[ i ] );
out.push( ar[ i + 1 ] );
} else {
out.push( ar[ i ] + "-" + current );
}
} else {
out.push( ar[ i ] );
}
i = a - 2;
}
return out;
}
var a = [ 1, 2, 4, 5, 7, 9, 11, 12, 15, 16, 17, 18, 19, 20, 22, 23, 25, 26, 27, 30, 31, 33, 34, 35, 36, 40, 41, 43,44,45,47,48,50 ];
$.writeln( "Input: " + a );
debugger;
$.writeln( "Output: " + indexize( a ) );
Copy link to clipboard
Copied
Oh, and do you really want 65,78,89,90 to contain 89-90?
It doesn't make sense (to me) to have sequences of 2 digits have a hyphen.
But if you do, make the "if (concat)" block look something like:
out.push( concat ? ( ar[ i ] + "-" + current ) : ar[ i ] );
Bob
Copy link to clipboard
Copied
Peter Kahrel has had his part of the fun years ago -- http://www.kahrel.plus.com/indesign/index_update.html
Copy link to clipboard
Copied
Thanks Jongaware and Bob
Really nice script, works nicely and thanks for your kind help
Sajeev
Copy link to clipboard
Copied
While we throwing in versions, I recently rewrote that terrible code I did a few years ago and now use this:
function range_pages (str, tolerance)
{
var array = str.split (/,\x20?/);
var temp = "";
var n = 0;
var range = false;
for (var i = 0; i < array.length; i++)
{
temp += array
while (array[n+1] - array
{n++; i++; range = true}
if (range)
temp = temp + "-" + array
else
temp += ", ";
n++; range = false;
}
return temp.replace (/, ?$/, "");
}
"tolerance" allows for skipping missing numbers in ranges, useful if you're not a.+y retented about 100% coorect coverage. So range_pages ("22, 23, 24, 26, 27, 28", 2) returns "22-28".
Peter
Copy link to clipboard
Copied
Peter,
I tried to improve my algorithm using Array.splice, but my benchmark shows that it's definitely not a good solution. The splice method is dramatically sluggish!
Finally, your approach is undisputably the most effective from among the solutions I tested. It obtained good results even with huge arrays. So I restarted following your logic, with some small optimizations and extra options. The code is not very elegant but the performance seems better this way:
function formatRanges(numbers, separator, joiner, minWidth, tolerance)
//----------------------------------------------------------
// Formats an array of integers into an ordered sequence of
// single numbers and/or ranges. Returns the formatted string.
//
// <numbers> Array of Numbers [required]
// The integers to format. Supports: empty array,
// unsorted array, duplicated elems, negative values.
//
// <separator> String [opt] -- Default value: ", ".
// A string inserted between each result.
// Ex. formatRanges([4,1,3,8,9,6], " | ")
// => "1 | 3-4 | 6 | 8-9"
//
// <joiner> String [opt] -- Default value: "-".
// A string used to format a range.
// Ex. formatRanges([4,1,3,8,9,6], ", ", "_")
// => "1, 3_4, 6, 8_9"
//
// <minWidth> Number [opt] -- Default value: 1.
// Minimum distance between the 1st and the last
// number in a range.
// Ex. formatRanges([1,2,4,5,6,8,9,10,11], '', '', 1)
// => "1-2, 4-6, 8-11"
// Ex. formatRanges([1,2,4,5,6,8,9,10,11], '', '', 2)
// => "1, 2, 4-6, 8-11"
// Ex. formatRanges([1,2,4,5,6,8,9,10,11], '', '', 3)
// => "1, 2, 4, 5, 6, 8-11"
//
// <tolerance> Number [opt] -- Default value: 0.
// Number of allowed missing numbers in a range,
// as suggested by Peter Kahrel (http://bit.ly/cABqIP)
// Ex. formatRanges([2,3,5,8,12,17,23], '', '', 1, 0)
// => "2-3, 5, 8, 12, 17, 23"
// Ex. formatRanges([2,3,5,8,12,17,23], '', '', 1, 1)
// => "2-5, 8, 12, 17, 23"
// Ex. formatRanges([2,3,5,8,12,17,23], '', '', 1, 2)
// => "2-8, 12, 17, 23"
{
// Defaults
separator = separator || ", ";
joiner = joiner || "-";
if( minWidth !== ~~minWidth || minWidth < 1 ) minWidth = 1;
if( tolerance !== ~~tolerance || ++tolerance < 1 ) tolerance = 1;
// Init.
var a = numbers.concat().sort(function(x,y){return x-y;}),
sz = a.length,
n = sz && a[0],
d = sz || false,
i = 0, w = 0, t = 0,
ret = [];
// Loop
while( d !== false )
{
if( 0 === (d=(++i<sz)?a-n:false) )
continue; // skip duplicates
if( d && (d<=tolerance) )
{
ret.push(n);
n += d;
++w;
t += (d-1);
continue;
}
if( w >= minWidth )
{
ret.length -= w;
ret.push((n-w-t)+joiner+n);
}
else
{
ret.push(n);
}
n += d;
w = t = 0;
}
return ret.join(separator);
}
@+
Marc
Copy link to clipboard
Copied
Very interesting -- thanks, Marc. I've also seen sluggish performance using Array.splice(). And now with the results you report it seems almost always more efficient to duplicate things into a temporary second array.
Peter
Copy link to clipboard
Copied
Very nice!
That is interesting about splice()...
Harbs
Copy link to clipboard
Copied
Is there a reason you are using global variables?
Harbs
Copy link to clipboard
Copied
Harbs. wrote:
Is there a reason you are using global variables?
I don't!
Copy link to clipboard
Copied
What are these?
sz = a.length,
n = sz && a[0],
d = sz || false,
i = 0, w = 0, t = 0,
Copy link to clipboard
Copied
Ah. Never mind. I did not notice that it was a comma at the end of the previous line...
Harbs
Copy link to clipboard
Copied
By the way, here is a snippet I constantly use to detect unintended global variables:
(function(){
var p, a=[];
for(p in this) a.unshift(p);
alert( "Globals:\r\r"+a.join("\r") );
}).call(this);
I generally place it at the end of a script --in the global scope of course.
You can also use it in an empty script to study the impressive amount of automatic global variables created by the engine before anything happens!
Dirk Beker shows me another trick using the '$.summary()' undocumented method, very useful in debugging too.
Try this:
alert( $.summary() );
And I discovered these ones:
alert( $.list() );
alert( $.listLO() );
@+
Marc
Copy link to clipboard
Copied
Nice!
Harbs
Copy link to clipboard
Copied
Very useful, thanks.
Copy link to clipboard
Copied
I revisited this thing (again) and realised that it could be done more efficiently:
function page_ranges (array, obj) { var temp = []; var range = false; for (var i = 0; i < array.length; i++) { temp.push (array); while (array[i+1] - array <= obj.tolerance) {i++; range = true} if (range) temp[temp.length-1] += obj.dash + array; range = false; } return temp; } // page_ranges
// Sample code: page_ranges ([1,2,3,4,7,8,9,15,17,21,22,23], {tolerance: 0, dash: "-"});
Peter
Copy link to clipboard
Copied
Very nice!
Is there a reason you'd want to use a tolerance of 0?
I like to make function interfaces as easy to use as possible.
This version makes the dash and tolerance optional:
function page_ranges (array, obj)
{
obj = obj || {};
var temp = [];
var range = false;
var tolerance = obj.tolerance || 0;
var dash = obj.dash || "-";
for (var i = 0; i < array.length; i++)
{
temp.push (array);
while (array[i+1] - array <= tolerance)
{i++; range = true}
if (range){
temp[temp.length-1] += dash + array;
}
range = false;
}
return temp;
} // page_ranges
Harbs
Copy link to clipboard
Copied
> Is there a reason you'd want to use a tolerance of 0?
That's a left-over from the script that produced indexes directly (i.e. without InDesign's index feature, like Marc's Index Brutal), where "tolerance = 0" meant "don't span ranges". At first I thought that there's not much point anymore now that I've put an interface on it (http://tinyurl.com/25ydd4j), but I now interpret tolerance 0 as "not skipping anything". But maybe tolerance = 1 is better for that. Dunno.
> I like to make function interfaces as easy to use as possible.
> This version makes the dash and tolerance optional:
Yes, I've seen that in some of Marc's scripts too. It's a nice trick, but in this case there's always a span and a dash, so in this script optionality isn't relevant. And since in large indexes this function can be called hundreds of times, checking the options every time might slow things down.
Peter
Copy link to clipboard
Copied
pkahrel wrote:
...now that I've put an interface on it (http://tinyurl.com/25ydd4j)...
NICE!
Yes, I've seen that in some of Marc's scripts too. It's a nice trick, but in this case there's always a span and a dash, so in this script optionality isn't relevant. And since in large indexes this function can be called hundreds of times, checking the options every time might slow things down.
Peter
The amount of time it takes to check an undefined property is negligible.
I just did a test of one million checks and it took about 4.4 seconds in the ESTK and about 4 seconds in InDesign:
var time = $.hiresTimer;
textUndefined();
var endTime = $.hiresTimer;
alert(endTime);
function textUndefined(){
var a={};
var i=1000000;
while(--i>0){
if(a.bla){
}
}
}
Interestingly enough, if I changed that check to explicitly check for undefined it takes about 2.9 seconds in ESTK and 2.5 seconds in InDesign (apparently the type conversion to a boolean costs):
var time = $.hiresTimer;
textUndefined();
var endTime = $.hiresTimer;
alert(endTime);
function textUndefined(){
var a={};
var i=1000000;
while(--i>0){
if(a.bla==undefined){
}
}
}
Harbs
Copy link to clipboard
Copied
Interesting comparisons -- thanks.
P.
Copy link to clipboard
Copied
@Peter & Harbs:
I suppose the page_ranges function takes only arrays of well prepared numbers: sorted and single standing doubles eliminated when using tolerance 0.
See the following tests I did:
//TESTS:
var a = [1,1,10,10,11,11,11,11,11,14,15,16,222,222,223,289];$.writeln("Tolerance: 0\t" + page_ranges (a, {tolerance: 0, dash: "-"}));
//Returns: 1-1,10-10,11-11,14,15,16,222-222,223,289$.writeln("Tolerance: 1\t" + page_ranges (a, {tolerance: 1, dash: "-"}));
//Returns: 1-1,10-11,14-16,222-223,289$.writeln("Tolerance: 3\t" + page_ranges (a, {tolerance: 3, dash: "-"}));
//Returns: 1-1,10-16,222-223,289function page_ranges (array, obj)
{
obj = obj || {};
var temp = [];
var range = false;
var tolerance = obj.tolerance || 0;
var dash = obj.dash || "-";
for (var i = 0; i < array.length; i++)
{
temp.push (array);
while (array[i+1] - array <= tolerance)
{i++; range = true}
if (range){
temp[temp.length-1] += dash + array;
}
range = false;
}
return temp;
}
Seems array[0] should get a special treatment when it's contents is not part of a range and it is doubled…
Uwe
Copy link to clipboard
Copied
Hi Laubender,
I don't get that issue whith the version of formatRanges that I've posted above (#17).
The function supports "empty array, unsorted array, duplicated elems, negative values."
//TESTS:
var a = [1,1,10,10,11,11,11,11,11,14,15,16,222,222,223,289];
alert("Tolerance: 0\t" + formatRanges(a, ', ', '-', 1, 0));
// Returns: 1, 10-11, 14-16, 222-223, 289
alert("Tolerance: 1\t" + formatRanges(a, ', ', '-', 1, 1));
// Returns: 1, 10-11, 14-16, 222-223, 289
alert("Tolerance: 2\t" + formatRanges(a, ', ', '-', 1, 2));
// Returns: 1, 10-16, 222-223, 289
function formatRanges(numbers, separator, joiner, minWidth, tolerance)
//----------------------------------------------------------
// Formats an array of integers into an ordered sequence of
// single numbers and/or ranges. Returns the formatted string.
//
// <numbers> Array of Numbers [required]
// The integers to format. Supports: empty array,
// unsorted array, duplicated elems, negative values.
//
// <separator> String [opt] -- Default value: ", ".
// A string inserted between each result.
// Ex. formatRanges([4,1,3,8,9,6], " | ")
// => "1 | 3-4 | 6 | 8-9"
//
// <joiner> String [opt] -- Default value: "-".
// A string used to format a range.
// Ex. formatRanges([4,1,3,8,9,6], ", ", "_")
// => "1, 3_4, 6, 8_9"
//
// <minWidth> Number [opt] -- Default value: 1.
// Minimum distance between the 1st and the last
// number in a range.
// Ex. formatRanges([1,2,4,5,6,8,9,10,11], '', '', 1)
// => "1-2, 4-6, 8-11"
// Ex. formatRanges([1,2,4,5,6,8,9,10,11], '', '', 2)
// => "1, 2, 4-6, 8-11"
// Ex. formatRanges([1,2,4,5,6,8,9,10,11], '', '', 3)
// => "1, 2, 4, 5, 6, 8-11"
//
// <tolerance> Number [opt] -- Default value: 0.
// Number of allowed missing numbers in a range,
// as suggested by Peter Kahrel (http://bit.ly/cABqIP)
// Ex. formatRanges([2,3,5,8,12,17,23], '', '', 1, 0)
// => "2-3, 5, 8, 12, 17, 23"
// Ex. formatRanges([2,3,5,8,12,17,23], '', '', 1, 1)
// => "2-5, 8, 12, 17, 23"
// Ex. formatRanges([2,3,5,8,12,17,23], '', '', 1, 2)
// => "2-8, 12, 17, 23"
{
// Defaults
separator = separator || ", ";
joiner = joiner || "-";
if( minWidth !== ~~minWidth || minWidth < 1 ) minWidth = 1;
if( tolerance !== ~~tolerance || ++tolerance < 1 ) tolerance = 1;
// Init.
var a = numbers.concat().sort(function(x,y){return x-y;}),
sz = a.length,
n = sz && a[0],
d = sz || false,
i = 0, w = 0, t = 0,
ret = [];
// Loop
while( d !== false )
{
if( 0 === (d=(++i<sz)?a-n:false) )
continue; // skip duplicates
if( d && (d<=tolerance) )
{
ret.push(n);
n += d;
++w;
t += (d-1);
continue;
}
if( w >= minWidth )
{
ret.length -= w;
ret.push((n-w-t)+joiner+n);
}
else
{
ret.push(n);
}
n += d;
w = t = 0;
}
return ret.join(separator);
}
@+
Marc
Copy link to clipboard
Copied
@Marc,
yes, I saw that as I tested your function as well. Thank you for your effort …
It's tough for me to exactly understand what's going on in your function elegant as it is.
Uwe
Copy link to clipboard
Copied