import * as sdk from "microsoft-cognitiveservices-speech-sdk";

type Offset = {
  text: string;
  offset: number;
  duration: number;
  slide: number;
};
export type TranscriptionSlide = Offset & {
  wpm: number;
  image?: string;
  occurrances: [string, number][];
  avgPitch?: number;
  pausesMade?: number;
  totalPauseTime?: number;
  avgPauseLength?: number;
};

export class LiveTranscription {
  // public data
  slide = 0;
  text = "";
  running = false;
  slides: TranscriptionSlide[] = [];

  // private data
  private offsets: Offset[] = [];
  private recognizer: sdk.SpeechRecognizer | null = null;
  private onDoneCallback = () => {};

  constructor(cb = () => {}) {
    this.onDoneCallback = cb;
  }

  // public methods
  start(stream?: MediaStream, lang = "en-US", device?: string) {
    this.offsets = [];
    this.slides = [];

    const recognizer = this.setup(stream, lang, device);
    recognizer.startContinuousRecognitionAsync(() => (this.running = true));

    let lastRecognized = "";
    recognizer.recognizing = recognizer.recognized = (_s, e) => {
      if (!e.result.text) return;

      if (sdk.ResultReason[e.result.reason] == "RecognizingSpeech")
        this.text = lastRecognized + e.result.text;
      else {
        lastRecognized += e.result.text + "\r\n";
        this.text = lastRecognized;
        this.offsets.push({
          text: e.result.text,
          duration: Math.round((e.result.duration / 10_000 / 1000) * 100) / 100,
          offset: Math.round((e.result.offset / 10_000 / 1000) * 100) / 100,
          slide: this.slide,
        });
      }
    };
    this.recognizer = recognizer;
  }

  async stop() {
    return new Promise<TranscriptionSlide[]>((res, _rej) => {
      this.recognizer?.stopContinuousRecognitionAsync(() => {
        this.running = false;
        this.recognizer?.close();
        this.recognizer = null;
        const slides = this.createSlides();
        this.slides = slides;
        this.onDoneCallback();
        res(slides);
      });
    });
  }

  toggle() {
    if (this.running) this.stop();
    else this.start();
  }

  addSlide() {
    this.recognizer?.stopContinuousRecognitionAsync(() => {
      this.recognizer?.startContinuousRecognitionAsync(() => {
        this.slide += 1;
      });
    });
  }

  pause() {
    this.recognizer?.stopContinuousRecognitionAsync();
  }

  resume() {
    this.recognizer?.startContinuousRecognitionAsync();
  }

  // private methods
  private setup(stream?: MediaStream, lang = "en-US", device?: string) {
    const audio = stream
      ? sdk.AudioConfig.fromStreamInput(stream)
      : device
      ? sdk.AudioConfig.fromMicrophoneInput(device)
      : sdk.AudioConfig.fromDefaultMicrophoneInput();
    const speech = sdk.SpeechConfig.fromSubscription(
      "b0f6bcc0dd804035abad1a21af313b0d",
      "westeurope",
    );
    speech.speechRecognitionLanguage = lang;
    speech.setProfanity(sdk.ProfanityOption.Raw);
    speech.requestWordLevelTimestamps();
    speech.enableDictation();
    return new sdk.SpeechRecognizer(speech, audio);
  }

  private countOccurances(text: string) {
    const badChars = [", ", "? ", ". ", ".", "!", "?", "", " "];
    const words = text
      .split(/(\b[^\s]+\b)/)
      .filter(x => !badChars.includes(x))
      .map(x => x.toLowerCase());
    const wordMap = new Map<string, number>();
    for (let i = 0; i < words.length; i++) {
      const key = words[i];
      wordMap.set(key, (wordMap.get(key) || 0) + 1);
    }
    const map = new Map(
      [...wordMap.entries()].sort((a, b) => {
        const res = b[1] - a[1];
        if (res === 0) return b[0].length - a[0].length;
        return res;
      }),
    );
    return { map, total: words.length };
  }

  private createSlides() {
    const lastSlide = this.offsets[this.offsets.length - 1].slide;
    const res: TranscriptionSlide[] = [];

    for (let i = 0; i <= lastSlide; i++) {
      const all = [];
      for (let j = 0; j < this.offsets.length; j++) {
        if (this.offsets[j].slide < i) continue;
        else if (this.offsets[j].slide > i) break;
        all.push(this.offsets[j]);
      }

      const text = all.map(x => x.text).join(" ");
      const { map, total } = this.countOccurances(text);

      let time = 0;
      for (let j = 0; j < all.length - 1; j++)
        time += all[j].duration + (all[j + 1].offset - all[j].offset);
      time += all[all.length - 1].duration;

      res.push({
        text,
        slide: i,
        duration: time,
        wpm: Math.round((total / (time / 60)) * 100) / 100,
        occurrances: [...map.entries()],
        offset: i === 0 ? 0 : res[i - 1].duration,
      });
    }
    return res;
  }
}
