/** Transforms a string to lower-case and trims off any whitespace */
export function normalizeString(str: string): string {
  return str.toLowerCase().trim()
}

/** Returns true if two strings are the same when lower-cased and trimmed */
export function areNormalizedStringsEqual(str1: string, str2: string): boolean {
  return normalizeString(str1) === normalizeString(str2)
}

/**
 * Returns a collator to use for comparing strings that may contain numbers.
 * These options allow for sorting of strings that contain numbers, for instance
 * "Branch 100" would normally appear before "Branch 15", but we want 15 < 100.
 */
export function makeNumericCollator(): Intl.Collator {
  const collator = new Intl.Collator('en', { numeric: true, sensitivity: 'base' })
  return collator
}

/**
 * Sorts an array of objects with an internalNumber property using numeric collation.
 * This ensures proper numeric sorting (e.g., "1" comes before "2" rather than "10" coming before "2").
 * Objects without an internalNumber will be sorted to the beginning.
 */
export function sortByInternalNumber<T extends { internalNumber?: string | null }>(
  items: T[]
): T[] {
  const collator = makeNumericCollator()
  return [...items].sort((a, b) => collator.compare(a.internalNumber ?? '', b.internalNumber ?? ''))
}

/**
 * List picked from no-irregular-whitespace rule implementation
 * https://eslint.org/docs/rules/no-irregular-whitespace
 */
const irregularWhitespaces = [
  '\u0009', // Character tabulation
  '\u000B', // Line Tabulation (\v) - <VT>
  '\u000C', // Form Feed (\f) - <FF>
  '\u00A0', // No-Break Space - <NBSP>
  '\u0085', // Next Line
  '\u1680', // Ogham Space Mark
  '\u180E', // Mongolian Vowel Separator - <MVS>
  '\ufeff', // Zero Width No-Break Space - <BOM>
  '\u2000', // En Quad
  '\u2001', // Em Quad
  '\u2002', // En Space - <ENSP>
  '\u2003', // Em Space - <EMSP>
  '\u2004', // Tree-Per-Em
  '\u2005', // Four-Per-Em
  '\u2006', // Six-Per-Em
  '\u2007', // Figure Space
  '\u2008', // Punctuation Space - <PUNCSP>
  '\u2009', // Thin Space
  '\u200A', // Hair Space
  '\u200B', // Zero Width Space - <ZWSP>
  '\u2028', // Line Separator
  '\u2029', // Paragraph Separator
  '\u202F', // Narrow No-Break Space
  '\u205f', // Medium Mathematical Space
  '\u3000', // Ideographic Space
]

const newLineWhitespaces = [
  '\u000D', // Carriage return - <CR>
  '\u000A', // Line feed - <LF>
]

// Taken from ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt
// Range: \u0000-\u001F and \u007F-\u009F
// Removed all new lines and tab characters from the lists above
const controlCharacters = [
  '\u0000', // Null
  '\u0001', // Start of heading
  '\u0002', // Start of text
  '\u0003', // End of text
  '\u0004', // End of transmission
  '\u0005', // Enquiry
  '\u0006', // Acknowledge
  '\u0007', // Bell
  '\u0008', // Backspace
  '\u000E', // Shift out
  '\u000F', // Shift in
  '\u0010', // Data link escape
  '\u0011', // Device control 1
  '\u0012', // Device control 2
  '\u0013', // Device control 3
  '\u0014', // Device control 4
  '\u0015', // Negative acknowledge
  '\u0016', // Synchronous idle
  '\u0017', // End of transmission block
  '\u0018', // Cancel
  '\u0019', // End of medium
  '\u001A', // Substitute
  '\u001B', // Escape
  '\u001C', // Information separator 4
  '\u001D', // Information separator 3
  '\u001E', // Information separator 2
  '\u001F', // Information separator 1
  '\u007F', // DELETE
  '\u0080', //
  '\u0081', //
  '\u0082', // BREAK PERMITTED HERE
  '\u0083', // NO BREAK HERE
  '\u0084', //
  '\u0086', // START OF SELECTED AREA
  '\u0087', // END OF SELECTED AREA
  '\u0088', // CHARACTER TABULATION SET
  '\u0089', // CHARACTER TABULATION WITH JUSTIFICATION
  '\u008A', // LINE TABULATION SET
  '\u008B', // PARTIAL LINE FORWARD
  '\u008C', // PARTIAL LINE BACKWARD
  '\u008D', // REVERSE LINE FEED
  '\u008E', // SINGLE SHIFT TWO
  '\u008F', // SINGLE SHIFT THREE
  '\u0090', // DEVICE CONTROL STRING
  '\u0091', // PRIVATE USE ONE
  '\u0092', // PRIVATE USE TWO
  '\u0093', // SET TRANSMIT STATE
  '\u0094', // CANCEL CHARACTER
  '\u0095', // MESSAGE WAITING
  '\u0096', // START OF GUARDED AREA
  '\u0097', // END OF GUARDED AREA
  '\u0098', // START OF STRING
  '\u0099', //
  '\u009A', // SINGLE CHARACTER INTRODUCER
  '\u009B', // CONTROL SEQUENCE INTRODUCER
  '\u009C', // STRING TERMINATOR
  '\u009D', // OPERATING SYSTEM COMMAND
  '\u009E', // PRIVACY MESSAGE
  '\u009F', // APPLICATION PROGRAM COMMAND
]

const irregularWhitespacesRegex = new RegExp(`[${irregularWhitespaces.join('')}]`, 'g')
const newLineWhitespacesRegex = new RegExp(`[${newLineWhitespaces.join('')}]`, 'g')
const controlCharactersRegex = new RegExp(`[${controlCharacters.join('')}]`, 'g')

/** Replaces all whitespace characters and control characters, NOT including newline */
export function replaceIrregularWhitespaces(value: string): string {
  return value.replace(irregularWhitespacesRegex, ' ').replace(controlCharactersRegex, '')
}

/** Replaces all whitespace characters, control character, AND newline characters */
export function replaceAllWhitespaces(value: string): string {
  return value
    .replace(irregularWhitespacesRegex, ' ')
    .replace(controlCharactersRegex, '')
    .replace(newLineWhitespacesRegex, ' ')
}

/**
 * Remove common words/suffixes from a company name so that fuzzy search across multiple companies
 * is more accurate. For instance `ABC Construction` and `XYZ Construction` should not be considered,
 * duplicates, but due to the `Construction` suffix, they would be by default.
 */
export function cleanCompanyName(name: string): string {
  return name
    .replaceAll(/construction/gi, '')
    .replaceAll(/consruction/gi, '')
    .replaceAll(/llc/gi, '')
    .replaceAll(/inc/gi, '')
    .replaceAll(/corporation/gi, '')
    .replaceAll(/corp/gi, '')
    .replaceAll('  ', ' ')
}

type UserForFullName = { firstName: string; lastName: string }

/**
 * Returns the full name of a user, given a first and last name.
 */
export function getFullName(user: UserForFullName): string {
  return `${user.firstName} ${user.lastName}`
}
