/**
 * Unicode utilities for Scala parser
 * Handles Unicode normalization and character validation
 */

/**
 * Normalizes Unicode strings using NFC (Canonical Decomposition, followed by Canonical Composition)
 * This ensures consistent representation of Unicode characters.
 *
 * @param text - The input text to normalize
 * @returns The normalized text
 */
export function normalizeUnicode(text: string): string {
  return text.normalize("NFC");
}

/**
 * Checks if a character is a valid Scala identifier start character
 * Follows Unicode identifier specification for Scala
 *
 * @param char - The character to check
 * @returns True if the character can start an identifier
 */
export function isIdentifierStart(char: string): boolean {
  if (char.length !== 1) return false;

  const codePoint = char.codePointAt(0);
  if (codePoint === undefined) return false;

  // Basic ASCII identifier characters
  if (
    (codePoint >= 0x41 && codePoint <= 0x5a) || // A-Z
    (codePoint >= 0x61 && codePoint <= 0x7a) || // a-z
    codePoint === 0x5f || // _
    codePoint === 0x24
  ) {
    // $
    return true;
  }

  // Mathematical symbols range (extended)
  if (
    (codePoint >= 0x2200 && codePoint <= 0x22ff) || // Mathematical Operators
    (codePoint >= 0x27c0 && codePoint <= 0x27ef) || // Miscellaneous Mathematical Symbols-A
    (codePoint >= 0x2980 && codePoint <= 0x29ff) || // Miscellaneous Mathematical Symbols-B
    (codePoint >= 0x2a00 && codePoint <= 0x2aff)
  ) {
    // Supplemental Mathematical Operators
    return true;
  }

  // Use Unicode property test for other characters (excluding digits for start characters)
  const testRegex = /\p{L}|\p{Mn}|\p{Mc}|\p{Pc}/u;
  return testRegex.test(char);
}

/**
 * Checks if a character is a valid Scala identifier continuation character
 *
 * @param char - The character to check
 * @returns True if the character can continue an identifier
 */
export function isIdentifierContinue(char: string): boolean {
  if (char.length !== 1) return false;

  const codePoint = char.codePointAt(0);
  if (codePoint === undefined) return false;

  // Basic ASCII identifier characters
  if (
    (codePoint >= 0x41 && codePoint <= 0x5a) || // A-Z
    (codePoint >= 0x61 && codePoint <= 0x7a) || // a-z
    (codePoint >= 0x30 && codePoint <= 0x39) || // 0-9
    codePoint === 0x5f || // _
    codePoint === 0x24
  ) {
    // $
    return true;
  }

  // Mathematical symbols range (extended)
  if (
    (codePoint >= 0x2200 && codePoint <= 0x22ff) || // Mathematical Operators
    (codePoint >= 0x27c0 && codePoint <= 0x27ef) || // Miscellaneous Mathematical Symbols-A
    (codePoint >= 0x2980 && codePoint <= 0x29ff) || // Miscellaneous Mathematical Symbols-B
    (codePoint >= 0x2a00 && codePoint <= 0x2aff)
  ) {
    // Supplemental Mathematical Operators
    return true;
  }

  // Use Unicode property test for other characters (including format characters)
  const testRegex = /\p{L}|\p{Mn}|\p{Mc}|\p{Nd}|\p{Pc}|\p{Cf}/u;
  return testRegex.test(char);
}

/**
 * Validates that a string is a valid Scala identifier
 *
 * @param identifier - The identifier string to validate
 * @returns True if the string is a valid identifier
 */
export function isValidIdentifier(identifier: string): boolean {
  if (!identifier || identifier.length === 0) return false;

  // Normalize the identifier
  const normalized = normalizeUnicode(identifier);

  // Check first character
  if (!isIdentifierStart(normalized[0])) return false;

  // Check remaining characters
  for (let i = 1; i < normalized.length; i++) {
    if (!isIdentifierContinue(normalized[i])) return false;
  }

  return true;
}

/**
 * Converts Unicode escape sequences in strings to actual Unicode characters
 * Handles \uXXXX patterns in string literals
 *
 * @param text - The text containing Unicode escapes
 * @returns The text with Unicode escapes converted to actual characters
 */
export function processUnicodeEscapes(text: string): string {
  return text.replace(/\\u([0-9A-Fa-f]{4})/g, (_, hex) => {
    const codePoint = parseInt(hex, 16);
    return String.fromCharCode(codePoint);
  });
}

/**
 * Escapes Unicode characters in strings for safe output
 * Converts non-ASCII characters back to \uXXXX format if needed
 *
 * @param text - The text to escape
 * @param escapeNonAscii - Whether to escape all non-ASCII characters
 * @returns The escaped text
 */
export function escapeUnicode(text: string, escapeNonAscii = false): string {
  if (!escapeNonAscii) return text;

  return text.replace(/[\u0080-\uFFFF]/g, (char) => {
    const codePoint = char.charCodeAt(0);
    return `\\u${codePoint.toString(16).padStart(4, "0").toUpperCase()}`;
  });
}

/**
 * Extended mathematical symbols commonly used in Scala functional programming
 */
export const MATHEMATICAL_SYMBOLS = {
  // Greek letters commonly used in functional programming
  ALPHA: "α", // U+03B1
  BETA: "β", // U+03B2
  GAMMA: "γ", // U+03B3
  DELTA: "δ", // U+03B4
  LAMBDA: "λ", // U+03BB
  MU: "μ", // U+03BC
  PI: "π", // U+03C0
  SIGMA: "σ", // U+03C3
  TAU: "τ", // U+03C4
  PHI: "φ", // U+03C6

  // Mathematical operators
  FORALL: "∀", // U+2200
  EXISTS: "∃", // U+2203
  ELEMENT_OF: "∈", // U+2208
  NOT_ELEMENT_OF: "∉", // U+2209
  SUBSET: "⊂", // U+2282
  SUPERSET: "⊃", // U+2283
  UNION: "∪", // U+222A
  INTERSECTION: "∩", // U+2229

  // Arrows and other symbols
  RIGHTWARDS_ARROW: "→", // U+2192
  LEFTWARDS_ARROW: "←", // U+2190
  UP_ARROW: "↑", // U+2191
  DOWN_ARROW: "↓", // U+2193
} as const;