"""
This plugin provides a command to identify hash types using comprehensive pattern matching.
"""
import logging
import re
import simplematrixbotlib as botlib
def identify_hash(hash_string):
    """
    Identify the hash type based on comprehensive pattern matching.
    Args:
        hash_string (str): The hash string to identify
    Returns:
        list: List of tuples (hash_type, hashcat_mode, john_format, confidence)
    """
    hash_string = hash_string.strip()
    hash_lower = hash_string.lower()
    length = len(hash_string)
    possible_types = []
    # Unix crypt and modular crypt formats (most specific first)
    if hash_string.startswith('$'):
        # yescrypt (modern Linux /etc/shadow)
        if re.match(r'^\$y\$', hash_string):
            possible_types.append(("yescrypt", None, "yescrypt", 95))
        # scrypt
        elif re.match(r'^\$7\$', hash_string):
            possible_types.append(("scrypt", "8900", "scrypt", 95))
        # Argon2
        elif re.match(r'^\$argon2(id?|d)\$', hash_string):
            if '$argon2i$' in hash_string:
                possible_types.append(("Argon2i", "10900", "argon2", 95))
            elif '$argon2d$' in hash_string:
                possible_types.append(("Argon2d", None, "argon2", 95))
            elif '$argon2id$' in hash_string:
                possible_types.append(("Argon2id", "10900", "argon2", 95))
        # bcrypt variants
        elif re.match(r'^\$(2[abxy]?)\$', hash_string):
            bcrypt_type = re.match(r'^\$(2[abxy]?)\$', hash_string).group(1)
            possible_types.append((f"bcrypt ({bcrypt_type})", "3200", "bcrypt", 95))
        # SHA-512 Crypt (common in Linux)
        elif re.match(r'^\$6\$', hash_string):
            possible_types.append(("SHA-512 Crypt (Unix)", "1800", "sha512crypt", 95))
        # SHA-256 Crypt (Unix)
        elif re.match(r'^\$5\$', hash_string):
            possible_types.append(("SHA-256 Crypt (Unix)", "7400", "sha256crypt", 95))
        # MD5 Crypt (Unix)
        elif re.match(r'^\$1\$', hash_string):
            possible_types.append(("MD5 Crypt (Unix)", "500", "md5crypt", 95))
        # Apache MD5
        elif re.match(r'^\$apr1\$', hash_string):
            possible_types.append(("Apache MD5 (apr1)", "1600", "md5crypt", 95))
        # AIX SMD5
        elif re.match(r'^\{smd5\}', hash_string, re.IGNORECASE):
            possible_types.append(("AIX {smd5}", "6300", None, 90))
        # AIX SSHA256
        elif re.match(r'^\{ssha256\}', hash_string, re.IGNORECASE):
            possible_types.append(("AIX {ssha256}", "6700", None, 90))
        # AIX SSHA512
        elif re.match(r'^\{ssha512\}', hash_string, re.IGNORECASE):
            possible_types.append(("AIX {ssha512}", "6800", None, 90))
        # phpBB3
        elif re.match(r'^\$H\$', hash_string):
            possible_types.append(("phpBB3", "400", "phpass", 90))
        # Wordpress
        elif re.match(r'^\$P\$', hash_string):
            possible_types.append(("Wordpress", "400", "phpass", 90))
        # Drupal 7+
        elif re.match(r'^\$S\$', hash_string):
            possible_types.append(("Drupal 7+", "7900", "drupal7", 90))
        # WBB3 (Woltlab Burning Board)
        elif re.match(r'^\$wbb3\$', hash_string):
            possible_types.append(("WBB3 (Woltlab)", None, None, 85))
        # PBKDF2-HMAC-SHA256
        elif re.match(r'^\$pbkdf2-sha256\$', hash_string):
            possible_types.append(("PBKDF2-HMAC-SHA256", "10900", "pbkdf2-hmac-sha256", 90))
        # PBKDF2-HMAC-SHA512
        elif re.match(r'^\$pbkdf2-sha512\$', hash_string):
            possible_types.append(("PBKDF2-HMAC-SHA512", None, "pbkdf2-hmac-sha512", 90))
        # Django PBKDF2
        elif re.match(r'^pbkdf2_sha256\$', hash_string):
            possible_types.append(("Django PBKDF2-SHA256", "10000", "django", 90))
        # Unknown modular crypt format
        else:
            possible_types.append(("Unknown Modular Crypt Format", None, None, 30))
        return possible_types
    # LDAP formats
    if hash_string.startswith('{'):
        if re.match(r'^\{SHA\}', hash_string, re.IGNORECASE):
            possible_types.append(("LDAP SHA-1", "101", "nsldap", 90))
        elif re.match(r'^\{SSHA\}', hash_string, re.IGNORECASE):
            possible_types.append(("LDAP SSHA (Salted SHA-1)", "111", "nsldaps", 90))
        elif re.match(r'^\{MD5\}', hash_string, re.IGNORECASE):
            possible_types.append(("LDAP MD5", "3210", None, 90))
        elif re.match(r'^\{SMD5\}', hash_string, re.IGNORECASE):
            possible_types.append(("LDAP SMD5 (Salted MD5)", "3211", None, 90))
        elif re.match(r'^\{CRYPT\}', hash_string, re.IGNORECASE):
            possible_types.append(("LDAP CRYPT", None, None, 85))
        return possible_types
    # Check for colon-separated formats (LM:NTLM, username:hash, etc.)
    if ':' in hash_string:
        parts = hash_string.split(':')
        # NetNTLMv1 / NetNTLMv2
        if len(parts) >= 5:
            possible_types.append(("NetNTLMv2", "5600", "netntlmv2", 85))
            possible_types.append(("NetNTLMv1", "5500", "netntlm", 75))
        # LM:NTLM format
        elif len(parts) == 2 and len(parts[0]) == 32 and len(parts[1]) == 32:
            possible_types.append(("LM:NTLM", "1000", "nt", 90))
        # Username:Hash or similar
        elif len(parts) == 2:
            hash_part = parts[1]
            if len(hash_part) == 32:
                possible_types.append(("NTLM (with username)", "1000", "nt", 80))
            elif len(hash_part) == 40:
                possible_types.append(("SHA-1 (with salt/username)", "110", None, 70))
        # PostgreSQL md5
        if hash_string.startswith('md5') and len(hash_string) == 35:
            possible_types.append(("PostgreSQL MD5", "3100", "postgres", 90))
        return possible_types if possible_types else None
    # MySQL formats
    if hash_string.startswith('*') and length == 41 and re.match(r'^\*[A-F0-9]{40}$', hash_string.upper()):
        possible_types.append(("MySQL 4.1/5.x", "300", "mysql-sha1", 95))
        return possible_types
    # Oracle formats
    if re.match(r'^[A-F0-9]{16}:[A-F0-9]{16}$', hash_string.upper()):
        possible_types.append(("Oracle 11g", "112", "oracle11", 90))
        return possible_types
    if re.match(r'^S:[A-F0-9]{60}$', hash_string.upper()):
        possible_types.append(("Oracle 12c/18c", "12300", "oracle12c", 90))
        return possible_types
    # MSSQL formats
    if re.match(r'^0x0100[A-F0-9]{8}[A-F0-9]{40}$', hash_string.upper()):
        possible_types.append(("MSSQL 2000", "131", "mssql", 90))
        return possible_types
    if re.match(r'^0x0200[A-F0-9]{8}[A-F0-9]{128}$', hash_string.upper()):
        possible_types.append(("MSSQL 2012/2014", "1731", "mssql12", 90))
        return possible_types
    # Base64 pattern check
    is_base64 = re.match(r'^[A-Za-z0-9+/]+=*$', hash_string) and length % 4 == 0
    # Raw hash identification by length
    is_hex = re.match(r'^[a-f0-9]+$', hash_lower)
    if is_hex:
        if length == 16:
            possible_types.append(("MySQL < 4.1", "200", "mysql", 85))
            possible_types.append(("Half MD5", None, None, 60))
        elif length == 32:
            possible_types.append(("MD5", "0", "raw-md5", 80))
            possible_types.append(("MD4", "900", "raw-md4", 70))
            possible_types.append(("NTLM", "1000", "nt", 75))
            possible_types.append(("LM", "3000", "lm", 60))
            possible_types.append(("RAdmin v2.x", "9900", None, 50))
            possible_types.append(("Snefru-128", None, None, 40))
            possible_types.append(("HMAC-MD5 (key = $pass)", "50", None, 50))
        elif length == 40:
            possible_types.append(("SHA-1", "100", "raw-sha1", 85))
            possible_types.append(("RIPEMD-160", "6000", "ripemd-160", 65))
            possible_types.append(("Tiger-160", None, None, 50))
            possible_types.append(("Haval-160", None, None, 45))
            possible_types.append(("HMAC-SHA1 (key = $pass)", "150", None, 55))
        elif length == 48:
            possible_types.append(("Tiger-192", None, None, 70))
            possible_types.append(("Haval-192", None, None, 65))
        elif length == 56:
            possible_types.append(("SHA-224", "1300", "raw-sha224", 85))
            possible_types.append(("Haval-224", None, None, 60))
        elif length == 64:
            possible_types.append(("SHA-256", "1400", "raw-sha256", 85))
            possible_types.append(("RIPEMD-256", None, None, 60))
            possible_types.append(("SHA3-256", "17400", "raw-sha3", 70))
            possible_types.append(("Keccak-256", "17800", "raw-keccak-256", 70))
            possible_types.append(("Haval-256", None, None, 50))
            possible_types.append(("GOST R 34.11-94", "6900", None, 55))
            possible_types.append(("BLAKE2b-256", None, None, 60))
        elif length == 80:
            possible_types.append(("RIPEMD-320", None, None, 80))
        elif length == 96:
            possible_types.append(("SHA-384", "10800", "raw-sha384", 85))
            possible_types.append(("SHA3-384", "17900", None, 70))
            possible_types.append(("Keccak-384", None, None, 65))
        elif length == 128:
            possible_types.append(("SHA-512", "1700", "raw-sha512", 85))
            possible_types.append(("Whirlpool", "6100", "whirlpool", 75))
            possible_types.append(("SHA3-512", "17600", None, 70))
            possible_types.append(("Keccak-512", None, None, 65))
            possible_types.append(("BLAKE2b-512", None, None, 60))
    # Base64 encoded hashes
    elif is_base64:
        if length == 24:
            possible_types.append(("MD5 (Base64)", None, None, 75))
        elif length == 28:
            possible_types.append(("SHA-1 (Base64)", None, None, 75))
        elif length == 32:
            possible_types.append(("SHA-224 (Base64)", None, None, 75))
        elif length == 44:
            possible_types.append(("SHA-256 (Base64)", None, None, 75))
        elif length == 64:
            possible_types.append(("SHA-384 (Base64)", None, None, 75))
        elif length == 88:
            possible_types.append(("SHA-512 (Base64)", None, None, 75))
    return possible_types if possible_types else [("Unknown", None, None, 0)]
async def handle_command(room, message, bot, prefix, config):
    """
    Function to handle the !hashid command.
    Args:
        room (Room): The Matrix room where the command was invoked.
        message (RoomMessage): The message object containing the command.
        bot (Bot): The bot object.
        prefix (str): The command prefix.
        config (dict): Configuration parameters.
    Returns:
        None
    """
    match = botlib.MessageMatch(room, message, bot, prefix)
    if match.is_not_from_this_bot() and match.prefix() and match.command("hashid"):
        logging.info("Received !hashid command")
        args = match.args()
        if len(args) < 1:
            usage_msg = """🔐 Hash Identifier Usage
Usage: !hashid <hash>
Examples:
• !hashid 5f4dcc3b5aa765d61d8327deb882cf99
• !hashid 5baa61e4c9b93f3f0682250b6cf8331b7ee68fd8
• !hashid $6$rounds=5000$salt$hash...
• !hashid $y$j9T$... (yescrypt from /etc/shadow)
Supported Hash Types:
• Modern: yescrypt, scrypt, Argon2, bcrypt
• Unix Crypt: SHA-512 Crypt, SHA-256 Crypt, MD5 Crypt
• Raw Hashes: MD5, SHA-1/224/256/384/512, SHA-3, NTLM, LM
• Database: MySQL, PostgreSQL, Oracle, MSSQL
• CMS: Wordpress, phpBB3, Drupal, Django
• LDAP: SSHA, SMD5, and various LDAP formats
• Network: NetNTLMv1/v2, Kerberos
• Exotic: Whirlpool, RIPEMD, BLAKE2, Keccak, GOST
"""
            await bot.api.send_markdown_message(room.room_id, usage_msg)
            return
        hash_input = ' '.join(args)
        try:
            # Identify the hash
            identified = identify_hash(hash_input)
            if not identified:
                await bot.api.send_text_message(
                    room.room_id,
                    "Could not identify hash type. Please verify the hash format."
                )
                return
            # Sort by confidence (highest first)
            identified = sorted(identified, key=lambda x: x[3], reverse=True)
            # Format the response
            hash_preview = hash_input[:60] + "..." if len(hash_input) > 60 else hash_input
            # Determine confidence indicator
            top_confidence = identified[0][3]
            if top_confidence >= 90:
                confidence_emoji = "🟢"
                confidence_label = "Very High"
            elif top_confidence >= 80:
                confidence_emoji = "🟡"
                confidence_label = "High"
            elif top_confidence >= 60:
                confidence_emoji = "🟠"
                confidence_label = "Medium"
            else:
                confidence_emoji = "🔴"
                confidence_label = "Low"
            # Build response inside collapsible details
            response = "🔐 Hash Identification Results
\n"
            response += "
\n"
            response += f"Input: {hash_preview}
\n"
            response += f"Length: {len(hash_input)} characters
\n"
            response += f"Overall Confidence: {confidence_emoji} {confidence_label} ({top_confidence}%)
\n"
            response += "
\n"
            response += f"Possible Hash Types ({len(identified)}):
\n"
            for idx, (hash_type, hashcat_mode, john_format, confidence) in enumerate(identified, 1):
                # Confidence indicator per hash
                if confidence >= 90:
                    conf_emoji = "🟢"
                elif confidence >= 80:
                    conf_emoji = "🟡"
                elif confidence >= 60:
                    conf_emoji = "🟠"
                else:
                    conf_emoji = "🔴"
                response += f"  {idx}. {hash_type} {conf_emoji} {confidence}%
\n"
                tools = []
                if hashcat_mode:
                    tools.append(f"Hashcat: -m {hashcat_mode}")
                if john_format:
                    tools.append(f"John: --format={john_format}")
                if tools:
                    response += f"     {' | '.join(tools)}
\n"
                response += "
\n"
            # Add useful tips
            if len(identified) == 1 and identified[0][0] not in ["Unknown", "Unknown Modular Crypt Format"]:
                response += "
💡 Single match with high confidence
\n"
            elif len(identified) > 5:
                response += "
ℹ️ Multiple possibilities - context may help narrow it down
\n"
            # Add legend
            response += "
\n"
            response += "Confidence Legend:
\n"
            response += "🟢 Very High (90-100%) | 🟡 High (80-89%) | 🟠 Medium (60-79%) | 🔴 Low (0-59%)
\n"
            response += " "
            await bot.api.send_markdown_message(room.room_id, response)
            logging.info(f"Identified hash types: {', '.join([f'{h[0]} ({h[3]}%)' for h in identified])}")
        except Exception as e:
            await bot.api.send_text_message(
                room.room_id,
                f"Error identifying hash: {str(e)}"
            )
            logging.error(f"Error in hashid command: {e}", exc_info=True)