import { keepContentChar, trimMultipleSpace } from "../common/textutils";

export const READING_SPEED_WPM = 200;
export const SPEAKING_SPEED_WPM = 130;
const MAXIMUM_WORD_DENSITY_DISPLAY = 10;

export function wordCount(text, tokenizer, stopWords) {
  let cleanContent = trimMultipleSpace(text);
  let tokenizedWordList = tokenizer.tokenize(cleanContent);
  tokenizedWordList = tokenizedWordList.map(function (token) {
    token = token.replaceAll(" ", "");
    token = keepContentChar(token);
    return token;
  });
  tokenizedWordList = tokenizedWordList.filter((token) => token.length != "");

  let wordDensityObj = {};
  for (word of tokenizedWordList) {
    if (!(word in wordDensityObj)) {
      wordDensityObj[word] = 0;
    }
    wordDensityObj[word] += 1;
  }
  const fullWordDensityList = [];
  for (var word in wordDensityObj) {
    fullWordDensityList.push([word, wordDensityObj[word]]);
  }
  fullWordDensityList.sort((a, b) => b[1] - a[1]);

  const wordDensityList = [];
  let wordIndex = 1;
  let latestWordCount;
  for (word of fullWordDensityList) {
    if (
      wordIndex > MAXIMUM_WORD_DENSITY_DISPLAY &&
      latestWordCount != word[1]
    ) {
      break;
    }

    if (
      stopWords !== undefined &&
      stopWords !== null &&
      stopWords.includes(word[0])
    ) {
      continue;
    }

    wordDensityList.push({
      index: wordIndex,
      word: word[0],
      count: word[1],
      density: ((word[1] / tokenizedWordList.length) * 100).toFixed(1),
    });
    latestWordCount = word[1];
    wordIndex += 1;
  }

  return {
    wordCount: tokenizedWordList.length,
    charCount: text.length,
    readingTime: Math.ceil(tokenizedWordList.length / READING_SPEED_WPM),
    speakingTime: Math.ceil(tokenizedWordList.length / SPEAKING_SPEED_WPM),
    words: tokenizedWordList,
    wordDensity: wordDensityList,
  };
}
