const xpath = require('xpath');
import * as Collections from 'typescript-collections';
import { DOMParser } from 'xmldom';
const syllable = require('syllable');
import { countWords, countDistinctWords, countDistinctLemmes, getTime, getWords } from '../utils';
import adverbs from './adverbs';
import etreConjug from './etreConjug';
import participePasse from './participePasse';
import preposition from './preposition';
import verbesTernes from './verbesTernes';
const Snowball = require('snowball');
import NlpjsTFr from '../nlp-js-tools-french/index';
const sanitizeHtml = require('sanitize-html');
import stopWords from './stopWords';

const MAX_LONG_SENTENCES = 0;
const MAX_LONG_PARAGRAPHS = 5;
const MAX_ADVERBS = 5;
const MAX_REPETITIONS = 5;
const MAX_PUNCTUATION_ERRORS = 0;
const MAX_PASSIVE = 5;
const MAX_NEGATIVE = 5;
const MAX_VERBES_TERNES = 5;

export const punctationMissingSpaceRegex = '(\\.[^ |\\)|\\.|\\u00a0|\\n])|(,[^ |\\u00a0])|(;[^ |\\)|\\u00a0|\\n])|([^ |\\u00a0];)|(\\?[^ |\\)|\\u00a0|\\n])|([^ |\\u00a0]\\?)|(\\![^ |\\)|\\u00a0|\\n])|([^ |\\u00a0]\\!)|([^ |\\u00a0]\\()|(\\)[^ |\\u00a0|\.|\,|\\!|\\?|\\n])|(:[^ |\\u00a0|\\n])|([^ |\\u00a0]:)|(«[^ |\\u00a0])|([^ |\\u00a0]«)|(»[^ |\\u00a0|\\n])|([^ |\\u00a0]»)';
export const punctationExtraSpaceRegex = '([ |\\u00a0],)|([ |\\u00a0]\\.)|([ |\\u00a0]\\))|(\\([ |\\u00a0])';

/* const spe = new SpellcheckErrors();
 */
export function convertHtmlToText(html: string): string {
  const doc = new DOMParser().parseFromString('<div>' + html + '</div>');
  const nodes = xpath.select('//p', doc);
  const newTextContent = nodes.map((n: any) => n.textContent).join('\n\n');
  return newTextContent;
}

// Sentence
export function isSentenceTooLong(text: string): boolean {
  return countWords(text) > 40;
}

function countSentencesTooLong(text: string): number {
  const regex = /[^\.!\?]+[\.!\?]+/g;
  let matchArr: RegExpExecArray;
  let sentencesTooLong: number = 0;
  while ((matchArr = regex.exec(text)) !== null) {
    if (isSentenceTooLong(matchArr[0])) {
      sentencesTooLong = sentencesTooLong + 1;
    }
  }
  return sentencesTooLong;
}

export function sentenceWarning(text: string) {
  return countSentencesTooLong(text) > MAX_LONG_SENTENCES;
}

export function avgWordsPerSentence(text: string): number {
  const regex = /[^\.!\?]+[\.!\?]+/g;
  let matchArr: RegExpExecArray;
  const sentenceLengthArray = [];
  while ((matchArr = regex.exec(text)) !== null) {
    sentenceLengthArray.push(countWords(matchArr[0]));
  }
  return sentenceLengthArray && sentenceLengthArray.length > 0 ?
    sentenceLengthArray.reduce((a, b) => (a + b)) / sentenceLengthArray.length
    : 0;
}

function avgWordsPerSubSentence(text: string): number {
  const regex = /[^\.!\?;]+[\.!\?;]+/g;
  let matchArr: RegExpExecArray;
  const sentenceLengthArray = [];
  while ((matchArr = regex.exec(text)) !== null) {
    sentenceLengthArray.push(countWords(matchArr[0]));
  }
  return sentenceLengthArray && sentenceLengthArray.length > 0 ?
    sentenceLengthArray.reduce((a, b) => (a + b)) / sentenceLengthArray.length
    : 0;
}

// Paragraph
export function isParagraphTooLong(text: string): boolean {
  return countWords(text) > 100;
}

function countParagraphsTooLong(text: string): number {
  let paragraphsTooLong: number = 0;
  let paragraphs = text.split('\n\n');
  paragraphs.forEach(p => {
    if (isParagraphTooLong(p)) {
      paragraphsTooLong = paragraphsTooLong + 1;
    }
  });
  return paragraphsTooLong;
}

export function paragraphWarning(text: string) {
  return countParagraphsTooLong(text) > MAX_LONG_PARAGRAPHS;
}

// Adverbs
export function getAdverbsRegexp(): RegExp {
  return new RegExp(`(?:^|[^a-zA-Z0-9À-ÿ]+)(${adverbs.join('|')})(?:$|[^a-zA-Z0-9À-ÿ]+)`, 'gi');
}

function countAdverbs(text: string): number {
  let countAdverbs: number = 0;
  const regex = getAdverbsRegexp();
  let matchArr: RegExpExecArray;
  while ((matchArr = regex.exec(text)) !== null) {
    countAdverbs = countAdverbs + 1;
  }
  return countAdverbs;
}

export function adverbWarning(text: string) {
  return countAdverbs(text) > MAX_ADVERBS;
}

// Repetitions
export function findRepetitionsInParagraph(paragraph: string):
  { key: string, occ: { start: number, length: number }[] }[] {
  const stemmer = new Snowball('French');

  const regex = /[A-zÀ-ÿ0-9]+/g;
  let matchArr: RegExpExecArray;
  let start: any;
  let length: any;
  const stems: { [key: string]: { start: number, length: number }[] } = {};
  while ((matchArr = regex.exec(paragraph)) != null) {
    const w = matchArr[0];
    start = matchArr.index;
    length = start + w.length;
    if (stopWords.indexOf(w.toLowerCase()) === -1) {
      stemmer.setCurrent(w);
      stemmer.stem();
      const stem = stemmer.getCurrent();

      let occurences = stems[stem];
      if (occurences) {
        occurences.push({ start, length });
      } else {
        occurences = [{ start, length }];
      }
      stems[stem] = occurences;
    }
  }

  let repetitions: { key: string, occ: { start: number, length: number }[] }[] = [];
  for (const key in stems) {
    const o = stems[key];
    if (o.length > 1) {
      repetitions.push({ key: key, occ: o });
    }
  }

  return repetitions;
}

function countRepetitionsInText(text: string): number {
  let paragraphs = text.split('\n\n');
  return paragraphs.map(p => findRepetitionsInParagraph(p).length).reduce((a, b) => a + b, 0);
}

export function repetitionWarning(text: string) {
  return countRepetitionsInText(text) > MAX_REPETITIONS;
}

function countPunctuationErrors(text: string): number {
  let countErrors: number = 0;
  const regex = new RegExp(punctationMissingSpaceRegex + '|' + punctationExtraSpaceRegex, 'g');
  let matchArr: RegExpExecArray;
  while ((matchArr = regex.exec(text)) !== null) {
    console.log(matchArr)
    countErrors = countErrors + 1;
  }
  return countErrors;
}

export function punctuationWarning(text: string) {
  return countPunctuationErrors(text) > MAX_PUNCTUATION_ERRORS;
}

/* export function updateSpellcheckErrors(text?: string): Promise<any> {
  return spe.updateSpellcheckErrors(text);
}

export function getSpellcheckErrors(text?: string) {
  return spe.spellcheckErrors;
} */

// Passive
export function getPassiveRegexp(): RegExp {
  return new RegExp(`(?:^|[^a-zA-Z0-9À-ÿ]+)((${etreConjug.join('|')}) [A-zÀ-ÿ0-9]*(${participePasse.join('|')})(?:^|[A-zÀ-ÿ0-9]*)) (${preposition.join('|')})(?:$|[^a-zA-Z0-9À-ÿ]+)`, 'gi');
}

function countPassive(text: string): number {
  let countPassive: number = 0;
  const regex = getPassiveRegexp();
  let matchArr: RegExpExecArray;
  while ((matchArr = regex.exec(text)) !== null) {
    countPassive = countPassive + 1;
  }
  return countPassive;
}

export function passiveWarning(text: string) {
  return countPassive(text) > MAX_PASSIVE;
}

// Negative
export function getNegativeRegexp(): RegExp {
  return new RegExp(`(?:^|[^a-zA-Z0-9À-ÿ]+)(n'|ne)(.){1,15} (pas|jamais|plus|personne|rien|aucune|aucun|ni)(?:$|[^a-zA-Z0-9À-ÿ]+)`, 'gi');
}

function countNegative(text: string): number {
  let countNegative: number = 0;
  const regex = getNegativeRegexp();
  let matchArr: RegExpExecArray;
  while ((matchArr = regex.exec(text)) !== null) {
    countNegative = countNegative + 1;
  }
  return countNegative;
}

export function negativeWarning(text: string) {
  return countNegative(text) > MAX_NEGATIVE;
}

// Verbes ternes
export function getVerbesTernesRegexp(): RegExp {
  return new RegExp(`(?:^|[^a-zA-Z0-9À-ÿ]+)((${verbesTernes.join('|')}) [A-zÀ-ÿ0-9]*)`, 'gi');
}

function endingWithParticipePasse(): RegExp {
  return new RegExp(`.*(${participePasse.join('|')})$`);
}

function countVerbesTernes(text: string): number {
  let countVerbesTernes: number = 0;
  const regex = getVerbesTernesRegexp();
  const regexPPasse = endingWithParticipePasse();
  let matchArr: RegExpExecArray;
  let matchPPasse: RegExpExecArray;
  while ((matchArr = regex.exec(text)) !== null) {
    if ((matchPPasse = regexPPasse.exec(matchArr[0].split(' ')[1])) === null) {
      countVerbesTernes = countVerbesTernes + 1;
    }
  }
  return countVerbesTernes;
}

export function verbesTerneWarning(text: string) {
  return countVerbesTernes(text) > MAX_VERBES_TERNES;
}

export function getLexicalLabelForValue(lexical: number): string {
  if (lexical > 0.75) {
    return 'Haute';
  } else if (lexical > 0.5) {
    return 'Moyenne';
  } else {
    return 'Basse';
  }
}

export function getLengthStatistics(textHtml: string): { avgSentencesPerParagraph: number, avgWordsPerSentence: number, avgCharactersPerWord: number } {
  console.time('length');
  const regexParagraphs = /<p[^>]*>(.*?)<\/p>|<ul[^>]*>(.*?)<\/ul>/g;
  const regexSentences = /[^\.!\?]+[\.!\?]+/g;
  const regexWords = /[A-zÀ-ÿ0-9]+/g;

  let matchArrParagraphs: RegExpExecArray;
  const paragraphsSentences = [];
  const wordsSentences = [];
  const characterWords = [];

  while ((matchArrParagraphs = regexParagraphs.exec(textHtml)) !== null) {
    // for each paragraph
    let matchArrSentences: RegExpExecArray;
    let sentences: number = 0;
    const paragraph = sanitizeHtml(matchArrParagraphs[0], {
      allowedTags: false,
      allowedAttributes: false
    });

    while ((matchArrSentences = regexSentences.exec(paragraph)) !== null) {
      sentences = sentences + 1;

      // for each word
      let matchArrWords: RegExpExecArray;
      let words: number = 0;
      while ((matchArrWords = regexWords.exec(matchArrSentences[0])) !== null) {
        words = words + 1;
        characterWords.push(matchArrWords[0].length);
      }
      wordsSentences.push(words);
    }
    paragraphsSentences.push(sentences);
  }

  const avgSentencesPerParagraph = paragraphsSentences && paragraphsSentences.length > 0 ?
    Math.round(paragraphsSentences.reduce((a, b) => (a + b)) / paragraphsSentences.length)
    : 0;
  const avgWordsPerSentence = wordsSentences && wordsSentences.length > 0 ?
    Math.round(wordsSentences.reduce((a, b) => (a + b)) / wordsSentences.length)
    : 0;
  const avgCharactersPerWord = characterWords && characterWords.length > 0 ?
    Math.round(characterWords.reduce((a, b) => (a + b)) / characterWords.length * 10) / 10
    : 0;

  console.timeEnd('length');
  return {
    avgSentencesPerParagraph: avgSentencesPerParagraph,
    avgWordsPerSentence: avgWordsPerSentence,
    avgCharactersPerWord: avgCharactersPerWord
  };
}

export function analyzeDialogues(rawText: string, textHtml: string): number {
  console.time('dialogueRatio');
  const totalWords = countWords(rawText);

  const regex = /(<li[^>]*>|<p[^>]*>[—|–|-])(.*?)<\/(li|p)>/g;
  let matchArr: RegExpExecArray;
  let countWordsInsideDialogues: number = 0;
  while ((matchArr = regex.exec(textHtml)) !== null) {
    const wordsInsideDialogue = sanitizeHtml(matchArr[2], {
      allowedTags: false,
      allowedAttributes: false
    });

    countWordsInsideDialogues = countWordsInsideDialogues + countWords(wordsInsideDialogue);
  }

  console.timeEnd('dialogueRatio');
  return Math.round(countWordsInsideDialogues / totalWords * 100);
}

export function analyseLexical(text: string): number {
  console.time('lexical');
  const words = countWords(text);
  const lemmes = countDistinctLemmes(text);

  console.timeEnd('lexical');
  return Math.round(Math.log(lemmes) / Math.log(words) * 100) / 100;
}

export function analyseNlp(text: string): {
  sentences: { total: number, negatives: number, passives: number, ternes: number, nonVerbales: number },
  pos: { names: number, adjectives: number, verbs: number, adverbs: number },
  topWords: { word: string, count: number }[]
} {
  console.time('analyseNlp');

  const regex = /[^\.!\?]+[\.!\?]+/g;
  let matchArr: RegExpExecArray;

  let total: number = 0;
  let negatives: number = 0;
  let passives: number = 0;
  let ternes: number = 0;
  let nonVerbales: number = 0;

  const topWordsList: { word: string, count: number }[] = [];

  const nlpConf = {
    strictness: true,
    tagTypes: [
      { name: 'adj', minimumLength: 3 },
      { name: 'adv', minimumLength: 3 },
      { name: 'nom', minimumLength: 3 },
      { name: 'ver', minimumLength: 0 },
    ]
  };
  const nlpToolsFr = new NlpjsTFr(text, nlpConf);
  const tagging = nlpToolsFr.posTagger();
  const names = tagging.filter((tag: any) => tag.pos.indexOf('NOM') !== - 1).length;
  const adjectives = tagging.filter((tag: any) => tag.pos.indexOf('ADJ') !== - 1).length;
  const verbs = tagging.filter((tag: any) => tag.pos.indexOf('VER') !== - 1).map((tag: any) => tag.word);
  const adverbs = tagging.filter((tag: any) => tag.pos.indexOf('ADV') !== - 1).length;

  // Top words
  const lemming = nlpToolsFr.lemmatizer()
    .filter((l: any) => !l.stop)
    .map((l: any) => l.lemma)
    .filter((l: string) => l.length > 3);

  const map: { [word: string]: number; } = {};
  for (let i = 0; i < lemming.length; i = i + 1) {
    const word = lemming[i];
    map[word] = map[word] ? map[word] + 1 : 1;
  }

  for (const key in map) {
    const count = map[key];
    topWordsList.push({ word: key, count: count });
  }

  while ((matchArr = regex.exec(text)) !== null) {
    const sentence = matchArr[0];

    const isNegative = countNegative(sentence) > 0 ? true : false;
    const isPassive = countPassive(sentence) > 0 ? true : false;
    const isTerne = countVerbesTernes(sentence) > 0 ? true : false;

    // Ternes & non verbales 
    /// ITERATE ON WORD AND FIND IN VERBS !
    const isNonVerbale = getWords(sentence).every((word: string) => {
      return verbs.indexOf(word.toLowerCase()) === - 1;
    });
    total = total + 1;
    negatives = isNegative ? negatives + 1 : negatives;
    passives = isPassive ? passives + 1 : passives;
    ternes = isTerne ? ternes + 1 : ternes;
    nonVerbales = isNonVerbale ? nonVerbales + 1 : nonVerbales;
  }

  const sentences = {
    total: total,
    negatives: negatives,
    passives: passives,
    ternes: ternes,
    nonVerbales: nonVerbales
  };

  const pos = {
    names: names,
    adjectives: adjectives,
    verbs: verbs.length,
    adverbs: adverbs
  };

  const topWords = topWordsList.sort((a, b) => b.count - a.count).slice(0, 50);

  console.timeEnd('analyseNlp');
  return {
    sentences: sentences,
    pos: pos,
    topWords: topWords
  };
}
