import { getUnitStrings } from '../units';
import { Token, TokenType } from './token';

type TokenResult = [length: number, token: Token | null];
type Tokenizer = (input: string, current: number) => TokenResult;

const tokenizeCharacter = (type: TokenType, value: string, input: string, current: number): TokenResult =>
  (value === input[current]) ? [1, { type, value }] : [0, null];

const tokenizePattern = (type: TokenType, pattern: RegExp, input: string, current: number): TokenResult => {
  let char = input[current];
  let consumedChars = 0;

  if (pattern.test(char)) {
    let value = '';
    while (char && pattern.test(char)) {
      value += char;
      consumedChars++;
      char = input[current + consumedChars];
    }
    return [consumedChars, { type, value }];
  }

  return [0, null];
};

const tokenizePatternInList = (list: string[], type: TokenType, pattern: RegExp, input: string, current: number): TokenResult => {
  const [length, token] = tokenizePattern(type, pattern, input, current);
  return list.includes(token?.value.toLowerCase() ?? '') ? [length, token] : [0, null];
}

const tokenizeWhitespace: Tokenizer = (input, current) => tokenizePattern('whitespace', /\s/, input, current);
const tokenizeOpenParen: Tokenizer = (input, current) => tokenizeCharacter('paren', '(', input, current);
const tokenizeCloseParen: Tokenizer = (input, current) => tokenizeCharacter('paren', ')', input, current);
const tokenizeComma: Tokenizer = (input, current) => tokenizePattern('comma', /[,]/, input, current);
const tokenizeNumber: Tokenizer = (input, current) => tokenizePattern('number', /[0-9]/, input, current);
const tokenizeIngredient: Tokenizer = (input, current) => tokenizePattern('ingredient', /[^(),]/, input, current);

const tokenizeUnit: Tokenizer = (input, current) => {
  const [length, token] = tokenizePattern('whole unit', /\w/, input, current);

  const { insensitive, sensitive } = getUnitStrings();

  if (insensitive.includes(token?.value.toLowerCase() ?? '') || sensitive.includes(token?.value ?? '')) {
    return [length, token];
  }

  return [0, null];
};

const tokenizeUnitFragment: Tokenizer = (input, current) => {
  const [length, token] = tokenizePattern('unit fragment', /\w/, input, current);

  const { insensitive } = getUnitStrings();

  if (insensitive.filter(x => x.includes(' ')).flatMap(x => x.split(' ')).includes(token?.value.toLowerCase() ?? '')) {
    return [length, token];
  }

  return [0, null];
};

const preparations = ['chopped', 'cubed', 'sliced'];
const tokenizePreparation: Tokenizer = (input, current) =>
  tokenizePatternInList(preparations, 'preparation', /\w/, input, current);

const sizes = ['small', 'medium', 'large'];
const tokenizeSize: Tokenizer = (input, current) =>
  tokenizePatternInList(sizes, 'size', /\S/, input, current);

const crosses = ['x'];
const tokenizeCross: Tokenizer = (input, current) =>
  tokenizePatternInList(crosses, 'cross', /\S/, input, current);

const prepositions = ['of'];
const tokenizePreposition: Tokenizer = (input, current) =>
  tokenizePatternInList(prepositions, 'preposition', /\S/, input, current);

const tokenizers: Tokenizer[] = [
  tokenizeWhitespace,
  tokenizeOpenParen,
  tokenizeCloseParen,
  tokenizeComma,
  tokenizeCross,
  tokenizePreposition,
  tokenizeNumber,
  tokenizeSize,
  tokenizeUnit,
  tokenizeUnitFragment,
  tokenizePreparation,
  tokenizeIngredient,
];

export const tokenize = (input: string): Token[] => {
  let current = 0;
  const tokens: Token[] = [];

  while (current < input.length) {
    let tokenized = false;

    for (const tokenizer of tokenizers) {
      if (tokenized) break;

      const [consumedChars, token] = tokenizer(input, current);

      if (consumedChars !== 0) {
        tokenized = true;
        current += consumedChars;
      }

      if (token) {
        tokens.push(token);
      }
    }

    if (!tokenized) {
      throw new Error(`Unrecognized sequence at ${current}`);
    }
  }

  return tokens;
}