FunguyBot/plugins/hashid.py

"""
This plugin provides a command to identify hash types using comprehensive pattern matching.
"""

import logging
import re
import simplematrixbotlib as botlib

def identify_hash(hash_string):
    """
    Identify the hash type based on comprehensive pattern matching.

    Args:
        hash_string (str): The hash string to identify

    Returns:
        list: List of tuples (hash_type, hashcat_mode, john_format, confidence)
    """

    hash_string = hash_string.strip()
    hash_lower = hash_string.lower()
    length = len(hash_string)

    possible_types = []

    # Unix crypt and modular crypt formats (most specific first)
    if hash_string.startswith('$'):
        # yescrypt (modern Linux /etc/shadow)
        if re.match(r'^\$y\$', hash_string):
            possible_types.append(("yescrypt", None, "yescrypt", 95))

        # scrypt
        elif re.match(r'^\$7\$', hash_string):
            possible_types.append(("scrypt", "8900", "scrypt", 95))

        # Argon2
        elif re.match(r'^\$argon2(id?|d)\$', hash_string):
            if '$argon2i$' in hash_string:
                possible_types.append(("Argon2i", "10900", "argon2", 95))
            elif '$argon2d$' in hash_string:
                possible_types.append(("Argon2d", None, "argon2", 95))
            elif '$argon2id$' in hash_string:
                possible_types.append(("Argon2id", "10900", "argon2", 95))

        # bcrypt variants
        elif re.match(r'^\$(2[abxy]?)\$', hash_string):
            bcrypt_type = re.match(r'^\$(2[abxy]?)\$', hash_string).group(1)
            possible_types.append((f"bcrypt ({bcrypt_type})", "3200", "bcrypt", 95))

        # SHA-512 Crypt (common in Linux)
        elif re.match(r'^\$6\$', hash_string):
            possible_types.append(("SHA-512 Crypt (Unix)", "1800", "sha512crypt", 95))

        # SHA-256 Crypt (Unix)
        elif re.match(r'^\$5\$', hash_string):
            possible_types.append(("SHA-256 Crypt (Unix)", "7400", "sha256crypt", 95))

        # MD5 Crypt (Unix)
        elif re.match(r'^\$1\$', hash_string):
            possible_types.append(("MD5 Crypt (Unix)", "500", "md5crypt", 95))

        # Apache MD5
        elif re.match(r'^\$apr1\$', hash_string):
            possible_types.append(("Apache MD5 (apr1)", "1600", "md5crypt", 95))

        # AIX SMD5
        elif re.match(r'^\{smd5\}', hash_string, re.IGNORECASE):
            possible_types.append(("AIX {smd5}", "6300", None, 90))

        # AIX SSHA256
        elif re.match(r'^\{ssha256\}', hash_string, re.IGNORECASE):
            possible_types.append(("AIX {ssha256}", "6700", None, 90))

        # AIX SSHA512
        elif re.match(r'^\{ssha512\}', hash_string, re.IGNORECASE):
            possible_types.append(("AIX {ssha512}", "6800", None, 90))

        # phpBB3
        elif re.match(r'^\$H\$', hash_string):
            possible_types.append(("phpBB3", "400", "phpass", 90))

        # Wordpress
        elif re.match(r'^\$P\$', hash_string):
            possible_types.append(("Wordpress", "400", "phpass", 90))

        # Drupal 7+
        elif re.match(r'^\$S\$', hash_string):
            possible_types.append(("Drupal 7+", "7900", "drupal7", 90))

        # WBB3 (Woltlab Burning Board)
        elif re.match(r'^\$wbb3\$', hash_string):
            possible_types.append(("WBB3 (Woltlab)", None, None, 85))

        # PBKDF2-HMAC-SHA256
        elif re.match(r'^\$pbkdf2-sha256\$', hash_string):
            possible_types.append(("PBKDF2-HMAC-SHA256", "10900", "pbkdf2-hmac-sha256", 90))

        # PBKDF2-HMAC-SHA512
        elif re.match(r'^\$pbkdf2-sha512\$', hash_string):
            possible_types.append(("PBKDF2-HMAC-SHA512", None, "pbkdf2-hmac-sha512", 90))

        # Django PBKDF2
        elif re.match(r'^pbkdf2_sha256\$', hash_string):
            possible_types.append(("Django PBKDF2-SHA256", "10000", "django", 90))

        # Unknown modular crypt format
        else:
            possible_types.append(("Unknown Modular Crypt Format", None, None, 30))

        return possible_types

    # LDAP formats
    if hash_string.startswith('{'):
        if re.match(r'^\{SHA\}', hash_string, re.IGNORECASE):
            possible_types.append(("LDAP SHA-1", "101", "nsldap", 90))
        elif re.match(r'^\{SSHA\}', hash_string, re.IGNORECASE):
            possible_types.append(("LDAP SSHA (Salted SHA-1)", "111", "nsldaps", 90))
        elif re.match(r'^\{MD5\}', hash_string, re.IGNORECASE):
            possible_types.append(("LDAP MD5", "3210", None, 90))
        elif re.match(r'^\{SMD5\}', hash_string, re.IGNORECASE):
            possible_types.append(("LDAP SMD5 (Salted MD5)", "3211", None, 90))
        elif re.match(r'^\{CRYPT\}', hash_string, re.IGNORECASE):
            possible_types.append(("LDAP CRYPT", None, None, 85))
        return possible_types

    # Check for colon-separated formats (LM:NTLM, username:hash, etc.)
    if ':' in hash_string:
        parts = hash_string.split(':')

        # NetNTLMv1 / NetNTLMv2
        if len(parts) >= 5:
            possible_types.append(("NetNTLMv2", "5600", "netntlmv2", 85))
            possible_types.append(("NetNTLMv1", "5500", "netntlm", 75))

        # LM:NTLM format
        elif len(parts) == 2 and len(parts[0]) == 32 and len(parts[1]) == 32:
            possible_types.append(("LM:NTLM", "1000", "nt", 90))

        # Username:Hash or similar
        elif len(parts) == 2:
            hash_part = parts[1]
            if len(hash_part) == 32:
                possible_types.append(("NTLM (with username)", "1000", "nt", 80))
            elif len(hash_part) == 40:
                possible_types.append(("SHA-1 (with salt/username)", "110", None, 70))

        # PostgreSQL md5
        if hash_string.startswith('md5') and len(hash_string) == 35:
            possible_types.append(("PostgreSQL MD5", "3100", "postgres", 90))

        return possible_types if possible_types else None

    # MySQL formats
    if hash_string.startswith('*') and length == 41 and re.match(r'^\*[A-F0-9]{40}$', hash_string.upper()):
        possible_types.append(("MySQL 4.1/5.x", "300", "mysql-sha1", 95))
        return possible_types

    # Oracle formats
    if re.match(r'^[A-F0-9]{16}:[A-F0-9]{16}$', hash_string.upper()):
        possible_types.append(("Oracle 11g", "112", "oracle11", 90))
        return possible_types

    if re.match(r'^S:[A-F0-9]{60}$', hash_string.upper()):
        possible_types.append(("Oracle 12c/18c", "12300", "oracle12c", 90))
        return possible_types

    # MSSQL formats
    if re.match(r'^0x0100[A-F0-9]{8}[A-F0-9]{40}$', hash_string.upper()):
        possible_types.append(("MSSQL 2000", "131", "mssql", 90))
        return possible_types

    if re.match(r'^0x0200[A-F0-9]{8}[A-F0-9]{128}$', hash_string.upper()):
        possible_types.append(("MSSQL 2012/2014", "1731", "mssql12", 90))
        return possible_types

    # Base64 pattern check
    is_base64 = re.match(r'^[A-Za-z0-9+/]+=*$', hash_string) and length % 4 == 0

    # Raw hash identification by length
    is_hex = re.match(r'^[a-f0-9]+$', hash_lower)

    if is_hex:
        if length == 16:
            possible_types.append(("MySQL < 4.1", "200", "mysql", 85))
            possible_types.append(("Half MD5", None, None, 60))

        elif length == 32:
            possible_types.append(("MD5", "0", "raw-md5", 80))
            possible_types.append(("MD4", "900", "raw-md4", 70))
            possible_types.append(("NTLM", "1000", "nt", 75))
            possible_types.append(("LM", "3000", "lm", 60))
            possible_types.append(("RAdmin v2.x", "9900", None, 50))
            possible_types.append(("Snefru-128", None, None, 40))
            possible_types.append(("HMAC-MD5 (key = $pass)", "50", None, 50))

        elif length == 40:
            possible_types.append(("SHA-1", "100", "raw-sha1", 85))
            possible_types.append(("RIPEMD-160", "6000", "ripemd-160", 65))
            possible_types.append(("Tiger-160", None, None, 50))
            possible_types.append(("Haval-160", None, None, 45))
            possible_types.append(("HMAC-SHA1 (key = $pass)", "150", None, 55))

        elif length == 48:
            possible_types.append(("Tiger-192", None, None, 70))
            possible_types.append(("Haval-192", None, None, 65))

        elif length == 56:
            possible_types.append(("SHA-224", "1300", "raw-sha224", 85))
            possible_types.append(("Haval-224", None, None, 60))

        elif length == 64:
            possible_types.append(("SHA-256", "1400", "raw-sha256", 85))
            possible_types.append(("RIPEMD-256", None, None, 60))
            possible_types.append(("SHA3-256", "17400", "raw-sha3", 70))
            possible_types.append(("Keccak-256", "17800", "raw-keccak-256", 70))
            possible_types.append(("Haval-256", None, None, 50))
            possible_types.append(("GOST R 34.11-94", "6900", None, 55))
            possible_types.append(("BLAKE2b-256", None, None, 60))

        elif length == 80:
            possible_types.append(("RIPEMD-320", None, None, 80))

        elif length == 96:
            possible_types.append(("SHA-384", "10800", "raw-sha384", 85))
            possible_types.append(("SHA3-384", "17900", None, 70))
            possible_types.append(("Keccak-384", None, None, 65))

        elif length == 128:
            possible_types.append(("SHA-512", "1700", "raw-sha512", 85))
            possible_types.append(("Whirlpool", "6100", "whirlpool", 75))
            possible_types.append(("SHA3-512", "17600", None, 70))
            possible_types.append(("Keccak-512", None, None, 65))
            possible_types.append(("BLAKE2b-512", None, None, 60))

    # Base64 encoded hashes
    elif is_base64:
        if length == 24:
            possible_types.append(("MD5 (Base64)", None, None, 75))
        elif length == 28:
            possible_types.append(("SHA-1 (Base64)", None, None, 75))
        elif length == 32:
            possible_types.append(("SHA-224 (Base64)", None, None, 75))
        elif length == 44:
            possible_types.append(("SHA-256 (Base64)", None, None, 75))
        elif length == 64:
            possible_types.append(("SHA-384 (Base64)", None, None, 75))
        elif length == 88:
            possible_types.append(("SHA-512 (Base64)", None, None, 75))

    return possible_types if possible_types else [("Unknown", None, None, 0)]


async def handle_command(room, message, bot, prefix, config):
    """
    Function to handle the !hashid command.

    Args:
        room (Room): The Matrix room where the command was invoked.
        message (RoomMessage): The message object containing the command.
        bot (Bot): The bot object.
        prefix (str): The command prefix.
        config (dict): Configuration parameters.

    Returns:
        None
    """
    match = botlib.MessageMatch(room, message, bot, prefix)
    if match.is_not_from_this_bot() and match.prefix() and match.command("hashid"):
        logging.info("Received !hashid command")

        args = match.args()

        if len(args) < 1:
            usage_msg = """<strong>🔐 Hash Identifier Usage</strong>

<strong>Usage:</strong> <code>!hashid &lt;hash&gt;</code>

<strong>Examples:</strong>
• <code>!hashid 5f4dcc3b5aa765d61d8327deb882cf99</code>
• <code>!hashid 5baa61e4c9b93f3f0682250b6cf8331b7ee68fd8</code>
• <code>!hashid $6$rounds=5000$salt$hash...</code>
• <code>!hashid $y$j9T$...</code> (yescrypt from /etc/shadow)

<strong>Supported Hash Types:</strong>
• <strong>Modern:</strong> yescrypt, scrypt, Argon2, bcrypt
• <strong>Unix Crypt:</strong> SHA-512 Crypt, SHA-256 Crypt, MD5 Crypt
• <strong>Raw Hashes:</strong> MD5, SHA-1/224/256/384/512, SHA-3, NTLM, LM
• <strong>Database:</strong> MySQL, PostgreSQL, Oracle, MSSQL
• <strong>CMS:</strong> Wordpress, phpBB3, Drupal, Django
• <strong>LDAP:</strong> SSHA, SMD5, and various LDAP formats
• <strong>Network:</strong> NetNTLMv1/v2, Kerberos
• <strong>Exotic:</strong> Whirlpool, RIPEMD, BLAKE2, Keccak, GOST
"""
            await bot.api.send_markdown_message(room.room_id, usage_msg)
            return

        hash_input = ' '.join(args)

        try:
            # Identify the hash
            identified = identify_hash(hash_input)

            if not identified:
                await bot.api.send_text_message(
                    room.room_id,
                    "Could not identify hash type. Please verify the hash format."
                )
                return

            # Sort by confidence (highest first)
            identified = sorted(identified, key=lambda x: x[3], reverse=True)

            # Format the response
            hash_preview = hash_input[:60] + "..." if len(hash_input) > 60 else hash_input

            # Determine confidence indicator
            top_confidence = identified[0][3]
            if top_confidence >= 90:
                confidence_emoji = "🟢"
                confidence_label = "Very High"
            elif top_confidence >= 80:
                confidence_emoji = "🟡"
                confidence_label = "High"
            elif top_confidence >= 60:
                confidence_emoji = "🟠"
                confidence_label = "Medium"
            else:
                confidence_emoji = "🔴"
                confidence_label = "Low"

            # Build response inside collapsible details
            response = "<details><summary><strong>🔐 Hash Identification Results</strong></summary>\n"
            response += "<br>\n"
            response += f"<strong>Input:</strong> <code>{hash_preview}</code><br>\n"
            response += f"<strong>Length:</strong> {len(hash_input)} characters<br>\n"
            response += f"<strong>Overall Confidence:</strong> {confidence_emoji} {confidence_label} ({top_confidence}%)<br>\n"
            response += "<br>\n"

            response += f"<strong>Possible Hash Types ({len(identified)}):</strong><br>\n"

            for idx, (hash_type, hashcat_mode, john_format, confidence) in enumerate(identified, 1):
                # Confidence indicator per hash
                if confidence >= 90:
                    conf_emoji = "🟢"
                elif confidence >= 80:
                    conf_emoji = "🟡"
                elif confidence >= 60:
                    conf_emoji = "🟠"
                else:
                    conf_emoji = "🔴"

                response += f"  <strong>{idx}. {hash_type}</strong> {conf_emoji} {confidence}%<br>\n"

                tools = []
                if hashcat_mode:
                    tools.append(f"Hashcat: <code>-m {hashcat_mode}</code>")
                if john_format:
                    tools.append(f"John: <code>--format={john_format}</code>")

                if tools:
                    response += f"     {' | '.join(tools)}<br>\n"

                response += "<br>\n"

            # Add useful tips
            if len(identified) == 1 and identified[0][0] not in ["Unknown", "Unknown Modular Crypt Format"]:
                response += "<br><strong>💡 Single match with high confidence</strong><br>\n"
            elif len(identified) > 5:
                response += "<br><em>ℹ️ Multiple possibilities - context may help narrow it down</em><br>\n"

            # Add legend
            response += "<br>\n"
            response += "<strong>Confidence Legend:</strong><br>\n"
            response += "🟢 Very High (90-100%) | 🟡 High (80-89%) | 🟠 Medium (60-79%) | 🔴 Low (0-59%)<br>\n"

            response += "</details>"

            await bot.api.send_markdown_message(room.room_id, response)
            logging.info(f"Identified hash types: {', '.join([f'{h[0]} ({h[3]}%)' for h in identified])}")

        except Exception as e:
            await bot.api.send_text_message(
                room.room_id,
                f"Error identifying hash: {str(e)}"
            )
            logging.error(f"Error in hashid command: {e}", exc_info=True)