Copy link to clipboard
Copied
When we write literary novels, it is common for many words to be repeated, but this is a mistake. So we have to correct this, change it to synonyms, or even correct the sentence. In indesign, I need to generate a file that counts the occurrences of each word, in each chapter, as I am using the "book" system, so each chapter is a document, and I want to generate these documents containing the list of word occurrences, and export to Excel, where I can view the words that appear most often in the text and work to fix my text, novel. I got a script, very good by the way, but it always saves in a single file name, for example it always saves in "capitulo.yxt", and when I run it again, it overlaps the words, but the same file name remains, so To make it easier, I would like each time I run the file, it would add "01" or "02" to the end of the saved file, so it would be something like "capitulo01.txt", "capitulo02.txt". I don't know if I made myself understood.
Copy link to clipboard
Copied
Hi @xstranhox55805267, I think that probably won't be difficult for someone here. Are you able to post the script?
- Mark
Copy link to clipboard
Copied
Maybe it matters not only how often they happen but how near are the repetions.
Copy link to clipboard
Copied
Ideally it should be a couple of line changes in your existing script. If you can't share the whole script share the portion where it creates the output file, search for things like "File", the filename i.e. "capitulo.txt" and share some lines around it here, we should be able to make the amends.
-Manan
Copy link to clipboard
Copied
A good filename might be "<document name>_wordcount.txt". This way each file would be automatically unique and it would be very clear which document it had counted.
- Mark
Copy link to clipboard
Copied
abaixo segue o script original.
var path = Folder.desktop + "/wordusage.txt";
writeText(path, wordFrequency())
/**
* Displays a list of document words with their usage count
* @Return a string list of word frequency
*
*/
function wordFrequency(){
var wList = "Document Word Usage: \n";
var aw = getAllWords()
var cArray = []
for (var i = 0; i < aw.length; i++){
var cnt = 0;
var cword = aw[i];
if (!checkItem(cArray, cword)) {
cArray.push(cword)
//gets the word count
for (var j = i; j < aw.length; j++){
if (aw[j] == cword) {
cnt++
}
}
wList = wList + cword + "\t" + cnt.toString() + "\n"
}
}
return wList
}
/**
* Get every word in active document
* @Return a string list of words
*
*/
function getAllWords(){
var s = app.activeDocument.stories
var allWords = []
for(var i=0; i < s.length; i++){
var w = s[i].words;
for(var j=0; j < w.length; j++){
allWords.push(w[j].contents)
}
}
return allWords
}
/**
* Check if an item is in an array
* @Param the array to check
* @Param the item to look for
* @Return true if the item is in the array
*
*/
function checkItem(a, obj) {
for (var i = 0; i < a.length; i++) {
if (a[i] === obj) {
return true;
}
}
return false;
}
/**
* Write a text file
* @Param the file path
* @Param the text
*
*/
function writeText(p,s){
var file = new File(p);
file.encoding = 'UTF-8';
file.open('w');
file.write(s);
file.close();
}
Copy link to clipboard
Copied
I see that the script produces a file on the desktop with the name wordusage.txt. With the script given below it will not create files with the name wordusage.txt, wordusage_1.txt, wordusage_2.txt etc
writeText(wordFrequency())
/**
* Displays a list of document words with their usage count
* @Return a string list of word frequency
*
*/
function wordFrequency() {
var wList = "Document Word Usage: \n";
var aw = getAllWords()
var cArray = []
for (var i = 0; i < aw.length; i++) {
var cnt = 0;
var cword = aw[i];
if (!checkItem(cArray, cword)) {
cArray.push(cword)
//gets the word count
for (var j = i; j < aw.length; j++) {
if (aw[j] == cword) {
cnt++
}
}
wList = wList + cword + "\t" + cnt.toString() + "\n"
}
}
return wList
}
/**
* Get every word in active document
* @Return a string list of words
*
*/
function getAllWords() {
var s = app.activeDocument.stories
var allWords = []
for (var i = 0; i < s.length; i++) {
var w = s[i].words;
for (var j = 0; j < w.length; j++) {
allWords.push(w[j].contents)
}
}
return allWords
}
/**
* Check if an item is in an array
* @Param the array to check
* @Param the item to look for
* @Return true if the item is in the array
*
*/
function checkItem(a, obj) {
for (var i = 0; i < a.length; i++) {
if (a[i] === obj) {
return true;
}
}
return false;
}
/**
* Write a text file
* @Param the file path
* @Param the text
*
*/
function writeText(s) {
var fileName = "wordusage"
var p = Folder.desktop + "/" + fileName + ".txt";
var count = 1
var file = new File(p);
while(file.exists){
file = new File(Folder.desktop + "/" + fileName + "_" + count++ + ".txt")
}
file.encoding = 'UTF-8';
file.open('w');
file.write(s);
file.close();
}
-Manan
Copy link to clipboard
Copied
Not sure this is relevant in your project but the script IndexMatic³ provides a Hits feature. The free version can report the 50 most repeated words, sorted by decreasing frequencies, and export results in TXT or CSV. You can also decide to exclude or include page numbers:
→ https://indiscripts.com/category/projects/IndexMatic
Best,
Marc