Unsafe Padding Character Issue in the ”b85decode” Function #136932

Closed as not planned

Closed as not planned

Unsafe Padding Character Issue in the ”b85decode” Function#136932

Labels

opened

on Jul 21, 2025

Bug report

Bug description:

The vulnerable_b85decode function uses ~ as a padding character when processing Base85 decoding (to pad the length of the encoded string to a multiple of 5). However, ~ itself is a valid encoding character in the Base85 alphabet (not a dedicated padding character). This causes confusion between the padding logic and the parsing of valid data, thereby introducing security vulnerabilities.

POC code:

import base64
import struct

# Original vulnerable b85decode implementation
def vulnerable_b85decode(b):
    """Original vulnerable Base85 decoding implementation"""
    # Simplified Base85 alphabet
    _b85alphabet = (b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
                    b"abcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~")
    
    # Create decoding table
    _b85dec = [None] * 256
    for i, c in enumerate(_b85alphabet):
        _b85dec[c] = i
    
    # Add padding
    padding = (-len(b)) % 5
    b = b + b'~' * padding  # Using '~' for padding
    
    out = []
    packI = struct.Struct('!I').pack
    
    for i in range(0, len(b), 5):
        chunk = b[i:i + 5]
        acc = 0
        try:
            for c in chunk:
                acc = acc * 85 + _b85dec[c]
            out.append(packI(acc))
        except TypeError:
            for j, c in enumerate(chunk):
                if _b85dec[c] is None:
                    raise ValueError(f'Invalid character {chr(c)} at position {i+j}')
            raise
        except struct.error:
            raise ValueError(f'Base85 overflow at block starting position {i}')
    
    result = b''.join(out)
    if padding:
        result = result[:-padding]
    return result

# Attack demonstration
def demonstrate_attack():
    print("Base85 Padding Character Attack Demonstration\n" + "="*40)
    
    # 1. Basic tampering attack
    print("\n1. Basic Tampering Attack:")
    original = b"Hello"
    encoded = b'NM&qnZB'
    decoded = vulnerable_b85decode(encoded)
    print(f"Original decoding: {decoded}")  # Should output b'Hello'
    
    # Attacker tampers with the encoded string
    malicious = encoded + b'~~~'  # Add three tildes
    malicious_decoded = vulnerable_b85decode(malicious)
    print(f"Tampered decoding: {malicious_decoded}")  # Outputs b'Hello\xd7\xa3\xf8'
    
    # 2. Data injection attack
    print("\n2. Data Injection Attack:")
    # Create malicious payload "EVIL"
    evil_payload = b'@<)q~'  # Should decode to b'EVIL'
    
    # Append to original encoding
    injected = encoded + evil_payload
    injected_decoded = vulnerable_b85decode(injected)
    print(f"Injected decoding: {injected_decoded}")  # Outputs b'HelloEVIL'
    
    # 3. Boundary condition attack
    print("\n3. Boundary Condition Attack:")
    # Create input that requires padding
    boundary = b'@<)q'  # Length 4, needs 1 '~' padding
    boundary_decoded = vulnerable_b85decode(boundary)
    print(f"Boundary input decoding: {boundary_decoded}")  # Should output b'EVL'
    
    # Now using malicious padding
    malicious_boundary = b'@<)q' + b'~'  # Explicitly add padding character
    malicious_boundary_decoded = vulnerable_b85decode(malicious_boundary)
    print(f"Malicious boundary input decoding: {malicious_boundary_decoded}")  # Outputs b'EVL' but is actually tampered
    
    # 4. Denial of Service attack
    print("\n4. Denial of Service Attack:")
    try:
        # Create input for an extremely large integer (5 '~'s = 84^5)
        dos_attack = b'~~~~~'
        vulnerable_b85decode(dos_attack)
    except Exception as e:
        print(f"DoS attack successful: {str(e)}")

if __name__ == "__main__":
    demonstrate_attack()

CPython versions tested on:

3.14

Operating systems tested on:

Windows

Metadata

Assignees

No one assigned

Labels

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests