Source code for valid8r.core.parsers

"""String parsing functions with Maybe monad error handling."""

from __future__ import annotations

import re
from datetime import (
    date,
    datetime,
    timedelta,
)
from decimal import (
    Decimal,
    InvalidOperation,
)
from enum import Enum
from functools import wraps
from typing import (
    TYPE_CHECKING,
    ParamSpec,
    TypeVar,
    cast,
    overload,
)
from uuid import UUID

from valid8r.core.maybe import (
    Failure,
    Maybe,
    Success,
)

try:
    import uuid_utils as uuidu
except Exception:  # noqa: BLE001
    uuidu = None  # type: ignore[assignment]

try:
    from email_validator import (
        EmailNotValidError,
        validate_email,
    )

    HAS_EMAIL_VALIDATOR = True
except ImportError:
    HAS_EMAIL_VALIDATOR = False
    EmailNotValidError = None  # type: ignore[assignment,misc]
    validate_email = None  # type: ignore[assignment]

import base64
import binascii
import json
from dataclasses import dataclass
from ipaddress import (
    IPv4Address,
    IPv4Network,
    IPv6Address,
    IPv6Network,
    ip_address,
    ip_network,
)
from pathlib import Path
from urllib.parse import urlsplit

if TYPE_CHECKING:
    from collections.abc import (
        Callable,
        Iterable,
    )

T = TypeVar('T')
K = TypeVar('K')
V = TypeVar('V')
P = ParamSpec('P')
E = TypeVar('E', bound=Enum)

ISO_DATE_LENGTH = 10

# Compiled regex patterns for phone parsing (cached for performance)
_PHONE_EXTENSION_PATTERN = re.compile(r'\s*[,;]\s*(\d+)$|\s+(?:x|ext\.?|extension)\s*(\d+)$', re.IGNORECASE)
_PHONE_VALID_CHARS_PATTERN = re.compile(r'^[\d\s()\-+.]+$', re.MULTILINE)
_PHONE_DIGIT_EXTRACTION_PATTERN = re.compile(r'\D')


[docs] def parse_str( input_value: object, error_message: str | None = None, *, strip: bool = True, allow_empty: bool = True, ) -> Maybe[str]: """Parse and validate input is a string type. Validates that the input is actually a string type (isinstance check). By default, strips leading and trailing whitespace for consistency with other parsers like parse_int and parse_float. This function provides type validation at the parser layer, ensuring the input is a string before any content validation is applied. It complements the validator layer which handles content rules like non-empty, length constraints, or pattern matching. Args: input_value: Value to validate (any type accepted, only str passes). error_message: Optional custom error message for type validation failures. If not provided, generates descriptive error based on actual type received. strip: If True (default), strip leading and trailing whitespace from the input. This provides consistency with other parsers like parse_int. Set to False to preserve whitespace exactly as provided. allow_empty: If True (default), allow empty strings to pass validation. Set to False to reject empty strings (after stripping if strip=True). For content validation, prefer chaining with non_empty_string() validator. Returns: Maybe[str]: Success(str) if input is a valid string, Failure(str) with descriptive error message otherwise. Examples: >>> parse_str("hello") Success('hello') >>> parse_str(" hello ") Success('hello') >>> parse_str(" hello ", strip=False) Success(' hello ') >>> parse_str("").is_success() True >>> parse_str("", allow_empty=False).is_failure() True >>> parse_str(None).is_failure() True >>> parse_str(42).is_failure() True >>> parse_str("hello").bind(lambda s: Maybe.success(s.upper())) Success('HELLO') Design Notes: - Strips whitespace by default (consistent with parse_int, parse_float) - Empty strings allowed by default (type validation, not content validation) - Use strip=False when whitespace is semantically meaningful - Chain with validators for content rules: parse_str(x).bind(non_empty_string()) - No DoS protection needed (isinstance is O(1), no expensive operations) """ # Handle None input with specific error message if input_value is None: return Failure(error_message or 'Value cannot be None') # Type validation - only accept str type if not isinstance(input_value, str): if error_message: return Failure(error_message) # Generate descriptive error with actual type name actual_type = type(input_value).__name__ return Failure(f'Expected string, got {actual_type}') # Apply stripping if requested (default behavior for consistency with other parsers) result = input_value.strip() if strip else input_value # Reject empty strings if not allowed if not allow_empty and result == '': return Failure(error_message or 'String cannot be empty') return Success(result)
[docs] def parse_int(input_value: str, error_message: str | None = None) -> Maybe[int]: """Parse a string to an integer. Converts string representations of integers to Python int values. Handles whitespace trimming and accepts whole numbers in float notation (e.g., "42.0"). Args: input_value: String to parse (leading/trailing whitespace is stripped) error_message: Optional custom error message for parsing failures Returns: Maybe[int]: Success(int) if parsing succeeds, Failure(str) with error message otherwise Examples: >>> parse_int("42") Success(42) >>> parse_int(" -17 ") Success(-17) >>> parse_int("42.0") Success(42) >>> parse_int("42.5").is_failure() True >>> parse_int("not a number").is_failure() True """ if not input_value: return Maybe.failure('Input must not be empty') cleaned_input = input_value.strip() try: if '.' in cleaned_input: float_val = float(cleaned_input) if float_val.is_integer(): # It's a whole number like 42.0 return Maybe.success(int(float_val)) # It has a fractional part like 42.5 return Maybe.failure(error_message or 'Input must be a valid integer') value = int(cleaned_input) return Maybe.success(value) except ValueError: return Maybe.failure(error_message or 'Input must be a valid integer')
[docs] def parse_float(input_value: str, error_message: str | None = None) -> Maybe[float]: """Parse a string to a floating-point number. Converts string representations of numbers to Python float values. Handles whitespace trimming and scientific notation. Args: input_value: String to parse (leading/trailing whitespace is stripped) error_message: Optional custom error message for parsing failures Returns: Maybe[float]: Success(float) if parsing succeeds, Failure(str) with error message otherwise Examples: >>> parse_float("3.14") Success(3.14) >>> parse_float(" -2.5 ") Success(-2.5) >>> parse_float("1e-3") Success(0.001) >>> parse_float("not a number").is_failure() True """ if not input_value: return Maybe.failure('Input must not be empty') try: value = float(input_value.strip()) return Maybe.success(value) except ValueError: return Maybe.failure(error_message or 'Input must be a valid number')
[docs] def parse_bool(input_value: str, error_message: str | None = None) -> Maybe[bool]: """Parse a string to a boolean value. Accepts various common representations of true/false values. Case-insensitive and handles whitespace. Recognized true values: 'true', 't', 'yes', 'y', '1' Recognized false values: 'false', 'f', 'no', 'n', '0' Args: input_value: String to parse (leading/trailing whitespace is stripped, case-insensitive) error_message: Optional custom error message for parsing failures Returns: Maybe[bool]: Success(bool) if parsing succeeds, Failure(str) with error message otherwise Examples: >>> parse_bool("true") Success(True) >>> parse_bool("YES") Success(True) >>> parse_bool("n") Success(False) >>> parse_bool(" 0 ") Success(False) >>> parse_bool("maybe").is_failure() True """ if not input_value: return Maybe.failure('Input must not be empty') # Normalize input input_lower = input_value.strip().lower() # True values if input_lower in ('true', 't', 'yes', 'y', '1'): return Maybe.success(value=True) # False values if input_lower in ('false', 'f', 'no', 'n', '0'): return Maybe.success(value=False) return Maybe.failure(error_message or 'Input must be a valid boolean')
[docs] def parse_date(input_value: str, date_format: str | None = None, error_message: str | None = None) -> Maybe[date]: """Parse a string to a date object. Parses date strings using ISO 8601 format (YYYY-MM-DD) by default, or a custom format if specified. Args: input_value: String to parse (leading/trailing whitespace is stripped) date_format: Optional strftime format string (e.g., '%Y-%m-%d', '%m/%d/%Y') error_message: Optional custom error message for parsing failures Returns: Maybe[date]: Success(date) if parsing succeeds, Failure(str) with error message otherwise Examples: >>> parse_date("2025-01-15") Success(datetime.date(2025, 1, 15)) >>> parse_date("01/15/2025", date_format="%m/%d/%Y") Success(datetime.date(2025, 1, 15)) >>> parse_date("invalid").is_failure() True """ if not input_value: return Maybe.failure('Input must not be empty') try: # Clean input input_value = input_value.strip() if date_format: # Parse with the provided format dt = datetime.strptime(input_value, date_format) # noqa: DTZ007 return Maybe.success(dt.date()) # Try ISO format by default, but be more strict # Standard ISO format should have dashes: YYYY-MM-DD if len(input_value) == ISO_DATE_LENGTH and input_value[4] == '-' and input_value[7] == '-': return Maybe.success(date.fromisoformat(input_value)) # Non-standard formats should be explicitly specified return Maybe.failure(error_message or 'Input must be a valid date') except ValueError: return Maybe.failure(error_message or 'Input must be a valid date')
[docs] def parse_datetime(input_value: str | None, error_message: str | None = None) -> Maybe[datetime]: """Parse a string to a timezone-aware datetime object. Parses datetime strings in ISO 8601 format with timezone information. Requires timezone to be specified (naive datetimes are rejected). Supports: - Z suffix for UTC (e.g., '2024-01-01T12:00:00Z') - Explicit UTC offset (e.g., '2024-01-01T12:00:00+00:00') - Positive/negative timezone offsets (e.g., '2024-01-01T12:00:00+05:30', '2024-01-01T12:00:00-08:00') - Fractional seconds (e.g., '2024-01-01T12:00:00.123456Z') Args: input_value: String to parse (leading/trailing whitespace is stripped) error_message: Optional custom error message for parsing failures Returns: Maybe[datetime]: Success(datetime) if parsing succeeds with timezone info, Failure(str) with error message otherwise Examples: >>> parse_datetime("2024-01-01T12:00:00Z") Success(datetime.datetime(2024, 1, 1, 12, 0, tzinfo=datetime.timezone.utc)) >>> parse_datetime("2024-01-01T12:00:00+05:30") Success(datetime.datetime(2024, 1, 1, 12, 0, tzinfo=datetime.timezone(datetime.timedelta(seconds=19800)))) >>> parse_datetime("2024-01-01T12:00:00").is_failure() True """ # Handle None or non-string input if input_value is None or not isinstance(input_value, str): return Maybe.failure(error_message or 'Input must not be empty') # Strip whitespace s = input_value.strip() if s == '': return Maybe.failure(error_message or 'Input must not be empty') # DoS protection: Early length guard (reasonable max for ISO datetime) # ISO 8601 datetime with timezone: ~35 chars max including microseconds if len(input_value) > 100: return Maybe.failure(error_message or 'Input is too long') try: # Parse ISO 8601 datetime with timezone # Note: fromisoformat in Python < 3.11 doesn't accept 'Z' suffix dt = datetime.fromisoformat(s.replace('Z', '+00:00')) # noqa: FURB162 # Require timezone-aware datetime if dt.tzinfo is None: return Maybe.failure(error_message or 'Datetime must include timezone information') return Maybe.success(dt) except ValueError: return Maybe.failure(error_message or 'Input must be a valid ISO 8601 datetime')
[docs] def parse_timedelta(input_value: str | None, error_message: str | None = None) -> Maybe[timedelta]: """Parse a string to a timedelta object. Parses duration strings in multiple formats: - Simple format: '90m', '2h', '45s', '3d' - Combined format: '1h 30m', '1d 2h 30m 45s', '1h30m' (no spaces) - ISO 8601 duration: 'PT1H30M', 'P1DT2H', 'PT45S' Supported units: - d: days - h: hours - m: minutes - s: seconds Args: input_value: String to parse (leading/trailing whitespace is stripped) error_message: Optional custom error message for parsing failures Returns: Maybe[timedelta]: Success(timedelta) if parsing succeeds, Failure(str) with error message otherwise Examples: >>> parse_timedelta("90m") Success(datetime.timedelta(seconds=5400)) >>> parse_timedelta("1h 30m") Success(datetime.timedelta(seconds=5400)) >>> parse_timedelta("PT1H30M") Success(datetime.timedelta(seconds=5400)) >>> parse_timedelta("-90m").is_failure() True """ # Handle None or non-string input if input_value is None or not isinstance(input_value, str): return Maybe.failure(error_message or 'Input must not be empty') # Strip whitespace s = input_value.strip() if s == '': return Maybe.failure(error_message or 'Input must not be empty') # DoS protection: Early length guard (reasonable max for duration string) if len(input_value) > 200: return Maybe.failure(error_message or 'Input is too long') try: # Check for negative values early if s.startswith('-'): return Maybe.failure(error_message or 'Duration cannot be negative') # Try ISO 8601 duration format first (e.g., PT1H30M, P1DT2H) if s.startswith('P'): return _parse_iso_duration(s, error_message) # Try simple/combined format (e.g., '90m', '1h 30m', '1h30m') return _parse_simple_duration(s, error_message) except (ValueError, AttributeError): return Maybe.failure(error_message or 'Input must be a valid duration')
def _parse_iso_duration(s: str, error_message: str | None) -> Maybe[timedelta]: """Parse ISO 8601 duration format (internal helper). Pattern format: P[nD][T[nH][nM][nS]] Examples: PT1H30M, P1DT2H, PT45S, P1D """ pattern = r'^P(?:(\d+)D)?(?:T(?:(\d+)H)?(?:(\d+)M)?(?:(\d+(?:\.\d+)?)S)?)?$' match = re.match(pattern, s, re.IGNORECASE) if not match: return Maybe.failure(error_message or 'Input must be a valid duration') days_str, hours_str, minutes_str, seconds_str = match.groups() days = int(days_str) if days_str else 0 hours = int(hours_str) if hours_str else 0 minutes = int(minutes_str) if minutes_str else 0 seconds = float(seconds_str) if seconds_str else 0.0 td = timedelta(days=days, hours=hours, minutes=minutes, seconds=seconds) return Maybe.success(td) def _parse_simple_duration(s: str, error_message: str | None) -> Maybe[timedelta]: """Parse simple/combined duration format (internal helper). Matches sequences like '1d', '2h', '30m', '45s'. Supports both '1h 30m' (with spaces) and '1h30m' (without spaces). """ pattern = r'(\d+)\s*([dhms])' matches = re.findall(pattern, s, re.IGNORECASE) if not matches: return Maybe.failure(error_message or 'Input must be a valid duration') days = 0 hours = 0 minutes = 0 seconds = 0 for value_str, unit in matches: value = int(value_str) unit_lower = unit.lower() if unit_lower == 'd': days += value elif unit_lower == 'h': hours += value elif unit_lower == 'm': minutes += value elif unit_lower == 's': seconds += value td = timedelta(days=days, hours=hours, minutes=minutes, seconds=seconds) return Maybe.success(td)
[docs] def parse_complex(input_value: str, error_message: str | None = None) -> Maybe[complex]: """Parse a string to a complex number. Accepts various complex number representations including both 'j' and 'i' notation. Handles parentheses and spaces in the input. Args: input_value: String to parse (whitespace is stripped, both 'i' and 'j' accepted) error_message: Optional custom error message for parsing failures Returns: Maybe[complex]: Success(complex) if parsing succeeds, Failure(str) with error message otherwise Examples: >>> parse_complex("3+4j") Success((3+4j)) >>> parse_complex("3 + 4i") Success((3+4j)) >>> parse_complex("(2-3j)") Success((2-3j)) >>> parse_complex("5j") Success(5j) >>> parse_complex("invalid").is_failure() True """ if not input_value: return Maybe.failure('Input must not be empty') try: # Strip whitespace from the outside but not inside input_str = input_value.strip() # Handle parentheses if present if input_str.startswith('(') and input_str.endswith(')'): input_str = input_str[1:-1] # Handle 'i' notation by converting to 'j' notation if 'i' in input_str and 'j' not in input_str: input_str = input_str.replace('i', 'j') # Handle spaces in complex notation (e.g., "3 + 4j") if ' ' in input_str: # Remove spaces while preserving operators input_str = input_str.replace(' + ', '+').replace(' - ', '-') input_str = input_str.replace('+ ', '+').replace('- ', '-') input_str = input_str.replace(' +', '+').replace(' -', '-') value = complex(input_str) return Maybe.success(value) except ValueError: return Maybe.failure(error_message or 'Input must be a valid complex number')
[docs] def parse_decimal(input_value: str, error_message: str | None = None) -> Maybe[Decimal]: """Parse a string to a Decimal for precise decimal arithmetic. Uses Python's Decimal type for arbitrary-precision decimal arithmetic, avoiding floating-point rounding errors. Ideal for financial calculations. Args: input_value: String representation of a decimal number error_message: Optional custom error message Returns: Maybe[Decimal]: Success with Decimal value or Failure with an error message Examples: >>> parse_decimal("3.14159") Success(Decimal('3.14159')) >>> parse_decimal(" 0.1 ") Success(Decimal('0.1')) >>> parse_decimal("-99.99") Success(Decimal('-99.99')) >>> parse_decimal("not a number").is_failure() True """ if not input_value: return Maybe.failure('Input must not be empty') try: value = Decimal(input_value.strip()) return Maybe.success(value) except (InvalidOperation, ValueError): return Maybe.failure(error_message or 'Input must be a valid number')
def _check_enum_has_empty_value(enum_class: type[Enum]) -> bool: """Check if an enum has an empty string as a value.""" return any(member.value == '' for member in enum_class.__members__.values()) def _find_enum_by_value(enum_class: type[Enum], value: str) -> Enum | None: """Find an enum member by its value.""" for member in enum_class.__members__.values(): if member.value == value: return member return None def _find_enum_by_name(enum_class: type[E], value: str) -> E | None: """Find an enum member by its name.""" try: return enum_class[value] except KeyError: return None
[docs] def parse_enum(input_value: str, enum_class: type[E], error_message: str | None = None) -> Maybe[object]: """Parse a string to an enum member. Matches input against enum member values and names (case-insensitive for names). Handles whitespace trimming and supports enums with empty string values. Args: input_value: String to parse (whitespace is stripped for non-exact matches) enum_class: The Enum class to parse into error_message: Optional custom error message for parsing failures Returns: Maybe[object]: Success with enum member if valid, Failure(str) with error message otherwise Examples: >>> from enum import Enum >>> class Color(Enum): ... RED = 'red' ... GREEN = 'green' ... BLUE = 'blue' >>> parse_enum("red", Color) Success(<Color.RED: 'red'>) >>> parse_enum("RED", Color) Success(<Color.RED: 'red'>) >>> parse_enum(" green ", Color) Success(<Color.GREEN: 'green'>) >>> parse_enum("yellow", Color).is_failure() True """ if not isinstance(enum_class, type) or not issubclass(enum_class, Enum): return Maybe.failure(error_message or 'Invalid enum class provided') # Check if empty is valid for this enum has_empty_value = _check_enum_has_empty_value(enum_class) if input_value == '' and not has_empty_value: return Maybe.failure('Input must not be empty') # Try direct match with enum values member = _find_enum_by_value(enum_class, input_value) if member is not None: return Maybe.success(member) member = _find_enum_by_name(enum_class, input_value) if member is not None: return Maybe.success(member) input_stripped = input_value.strip() if input_stripped != input_value: member = _find_enum_by_value(enum_class, input_stripped) if member is not None: return Maybe.success(member) for name in enum_class.__members__: if name.lower() == input_value.lower(): return Maybe.success(enum_class[name]) return Maybe.failure(error_message or 'Input must be a valid enumeration value')
[docs] def parse_list( input_value: str, element_parser: Callable[[str], Maybe[T]] | None = None, separator: str = ',', error_message: str | None = None, ) -> Maybe[list[T]]: """Parse a string to a list using the specified element parser and separator. Splits the input string by the separator and parses each element using the element parser. If no element parser is provided, elements are returned as trimmed strings. Args: input_value: The string to parse element_parser: A function that parses individual elements (default: strips whitespace) separator: The string that separates elements (default: ',') error_message: Custom error message for parsing failures Returns: Maybe[list[T]]: Success with parsed list or Failure with error message Examples: >>> parse_list("a,b,c") Success(['a', 'b', 'c']) >>> parse_list("1, 2, 3", element_parser=parse_int) Success([1, 2, 3]) >>> parse_list("apple|banana|cherry", separator="|") Success(['apple', 'banana', 'cherry']) >>> parse_list("1,2,invalid", element_parser=parse_int).is_failure() True """ if not input_value: return Maybe.failure('Input must not be empty') def default_parser(s: str) -> Maybe[T]: return Maybe.success(s.strip()) # type: ignore[arg-type] parser = element_parser if element_parser is not None else default_parser elements = input_value.split(separator) parsed_elements: list[T] = [] for i, element in enumerate(elements, start=1): match parser(element.strip()): case Success(value) if value is not None: parsed_elements.append(value) case Failure() if error_message: return Maybe.failure(error_message) case Failure(result): return Maybe.failure(f"Failed to parse element {i} '{element}': {result}") return Maybe.success(parsed_elements)
def _parse_key_value_pair( # noqa: PLR0913 pair: str, index: int, key_parser: Callable[[str], Maybe[K]], # K can be None value_parser: Callable[[str], Maybe[V]], # V can be None key_value_separator: str, error_message: str | None = None, ) -> tuple[bool, K | None, V | None, str | None]: """Parse a single key-value pair. Returns: A tuple of (success, key, value, error_message) """ if key_value_separator not in pair: error = f"Invalid key-value pair '{pair}': missing separator '{key_value_separator}'" return False, None, None, error_message or error key_str, value_str = pair.split(key_value_separator, 1) # Parse the key key_result = key_parser(key_str.strip()) if key_result.is_failure(): error = f"Failed to parse key in pair {index + 1} '{pair}': {key_result.error_or('Parse error')}" return False, None, None, error_message or error # Parse the value value_result = value_parser(value_str.strip()) if value_result.is_failure(): error = f"Failed to parse value in pair {index + 1} '{pair}': {value_result.error_or('Parse error')}" return False, None, None, error_message or error # At this point both results are Success; extract concrete values by pattern matching match key_result: case Success(key_val): key: K | None = key_val case _: key = None match value_result: case Success(value_val): value: V | None = value_val case _: value = None return True, key, value, None
[docs] def parse_dict( # noqa: PLR0913 input_value: str, key_parser: Callable[[str], Maybe[K]] | None = None, value_parser: Callable[[str], Maybe[V]] | None = None, pair_separator: str = ',', key_value_separator: str = ':', error_message: str | None = None, ) -> Maybe[dict[K, V]]: """Parse a string to a dictionary using the specified parsers and separators. Splits the input string by pair_separator, then splits each pair by key_value_separator. Parses keys and values using the provided parsers (defaults to trimmed strings). Args: input_value: The string to parse key_parser: A function that parses keys (default: strips whitespace) value_parser: A function that parses values (default: strips whitespace) pair_separator: The string that separates key-value pairs (default: ',') key_value_separator: The string that separates keys from values (default: ':') error_message: Custom error message for parsing failures Returns: Maybe[dict[K, V]]: Success with parsed dictionary or Failure with error message Examples: >>> parse_dict("a:1,b:2,c:3") Success({'a': '1', 'b': '2', 'c': '3'}) >>> parse_dict("x:10, y:20", value_parser=parse_int) Success({'x': 10, 'y': 20}) >>> parse_dict("name=Alice|age=30", pair_separator="|", key_value_separator="=") Success({'name': 'Alice', 'age': '30'}) >>> parse_dict("a:1,b:invalid", value_parser=parse_int).is_failure() True """ if not input_value: return Maybe.failure('Input must not be empty') def _default_parser(s: str) -> Maybe[str | None]: """Parse a string by stripping whitespace.""" return Maybe.success(s.strip()) actual_key_parser: Callable[[str], Maybe[K | None]] = cast( 'Callable[[str], Maybe[K | None]]', key_parser if key_parser is not None else _default_parser ) actual_value_parser: Callable[[str], Maybe[V | None]] = cast( 'Callable[[str], Maybe[V | None]]', value_parser if value_parser is not None else _default_parser ) # Split the input string by the pair separator pairs = input_value.split(pair_separator) # Parse each key-value pair parsed_dict: dict[K, V] = {} for i, pair in enumerate(pairs): success, key, value, err = _parse_key_value_pair( pair, i, actual_key_parser, actual_value_parser, key_value_separator, error_message ) if not success: return Maybe.failure(err or 'Failed to parse key-value pair') if key is not None and value is not None: parsed_dict[key] = value return Maybe.success(parsed_dict)
[docs] def parse_set( input_value: str, element_parser: Callable[[str], Maybe[T]] | None = None, separator: str | None = None, error_message: str | None = None, ) -> Maybe[set[T]]: """Parse a string to a set using the specified element parser and separator. Splits the input string by the separator and parses each element using the element parser. Automatically removes duplicate values. If no element parser is provided, elements are returned as trimmed strings. Args: input_value: The string to parse element_parser: A function that parses individual elements (default: strips whitespace) separator: The string that separates elements (default: ',') error_message: Custom error message for parsing failures Returns: Maybe[set[T]]: Success with parsed set or Failure with error message Examples: >>> result = parse_set("a,b,c") >>> result.is_success() True >>> sorted(result.value_or(set())) ['a', 'b', 'c'] >>> result = parse_set("1, 2, 3, 2, 1", element_parser=parse_int) >>> sorted(result.value_or(set())) [1, 2, 3] >>> result = parse_set("red|blue|green|red", separator="|") >>> sorted(result.value_or(set())) ['blue', 'green', 'red'] >>> parse_set("1,2,invalid", element_parser=parse_int).is_failure() True """ if separator is None: separator = ',' # Use the list parser and convert to set result = parse_list(input_value, element_parser, separator, error_message) if result.is_failure(): return Maybe.failure('Parse error') # Convert to set (removes duplicates) parsed_list = result.value_or([]) return Maybe.success(set(parsed_list))
# Type-specific validation parsers
[docs] def parse_int_with_validation( input_value: str, min_value: int | None = None, max_value: int | None = None, error_message: str | None = None, ) -> Maybe[int]: """Parse a string to an integer with range validation. Combines parsing and validation in a single step. First parses the string to an integer, then validates it falls within the specified range. Args: input_value: The string to parse min_value: Minimum allowed value (inclusive) max_value: Maximum allowed value (inclusive) error_message: Custom error message for parsing failures Returns: Maybe[int]: Success with validated integer or Failure with error message Examples: >>> parse_int_with_validation("42", min_value=0, max_value=100) Success(42) >>> parse_int_with_validation("5", min_value=10).is_failure() True >>> parse_int_with_validation("150", max_value=100).is_failure() True >>> parse_int_with_validation("50", min_value=0, max_value=100) Success(50) """ result = parse_int(input_value, error_message) if result.is_failure(): return result # Validate the parsed value value = result.value_or(0) if min_value is not None and value < min_value: return Maybe.failure(error_message or f'Value must be at least {min_value}') if max_value is not None and value > max_value: return Maybe.failure(error_message or f'Value must be at most {max_value}') return Maybe.success(value)
[docs] def parse_list_with_validation( # noqa: PLR0913 input_value: str, element_parser: Callable[[str], Maybe[T]] | None = None, separator: str = ',', min_length: int | None = None, max_length: int | None = None, error_message: str | None = None, ) -> Maybe[list[T]]: """Parse a string to a list with length validation. Combines parsing and validation in a single step. First parses the string to a list, then validates it has an acceptable number of elements. Args: input_value: The string to parse element_parser: A function that parses individual elements separator: The string that separates elements min_length: Minimum allowed list length max_length: Maximum allowed list length error_message: Custom error message for parsing failures Returns: Maybe[list[T]]: Success with validated list or Failure with error message Examples: >>> parse_list_with_validation("a,b,c", min_length=2, max_length=5) Success(['a', 'b', 'c']) >>> parse_list_with_validation("1,2", element_parser=parse_int, min_length=3).is_failure() True >>> parse_list_with_validation("1,2,3,4,5,6", max_length=5).is_failure() True >>> parse_list_with_validation("10,20,30", element_parser=parse_int, min_length=1) Success([10, 20, 30]) """ result = parse_list(input_value, element_parser, separator, error_message) if result.is_failure(): return result # Validate the parsed list parsed_list = result.value_or([]) if min_length is not None and len(parsed_list) < min_length: return Maybe.failure(error_message or f'List must have at least {min_length} elements') if max_length is not None and len(parsed_list) > max_length: return Maybe.failure(error_message or f'List must have at most {max_length} elements') return Maybe.success(parsed_list)
[docs] def parse_dict_with_validation( # noqa: PLR0913 input_value: str, key_parser: Callable[[str], Maybe[K]] | None = None, value_parser: Callable[[str], Maybe[V]] | None = None, pair_separator: str = ',', key_value_separator: str = ':', required_keys: list[str] | None = None, error_message: str | None = None, ) -> Maybe[dict[K, V]]: """Parse a string to a dictionary with required keys validation. Combines parsing and validation in a single step. First parses the string to a dictionary, then validates that all required keys are present. Args: input_value: The string to parse key_parser: A function that parses keys value_parser: A function that parses values pair_separator: The string that separates key-value pairs key_value_separator: The string that separates keys from values required_keys: List of keys that must be present error_message: Custom error message for parsing failures Returns: Maybe[dict[K, V]]: Success with validated dictionary or Failure with error message Examples: >>> parse_dict_with_validation("name:Alice,age:30", required_keys=["name", "age"]) Success({'name': 'Alice', 'age': '30'}) >>> parse_dict_with_validation("name:Bob", required_keys=["name", "age"]).is_failure() True >>> result = parse_dict_with_validation("x:10,y:20", value_parser=parse_int, required_keys=["x"]) >>> result.value_or({}) {'x': 10, 'y': 20} """ result = parse_dict(input_value, key_parser, value_parser, pair_separator, key_value_separator, error_message) if result.is_failure(): return result # Validate the parsed dictionary parsed_dict = result.value_or({}) if required_keys: missing_keys = [key for key in required_keys if key not in parsed_dict] if missing_keys: return Maybe.failure(error_message or f'Missing required keys: {", ".join(missing_keys)}') return Maybe.success(parsed_dict)
[docs] def create_parser(convert_func: Callable[[str], T], error_message: str | None = None) -> Callable[[str], Maybe[T]]: """Create a parser function from a conversion function. This factory takes a function that converts strings to values and wraps it in error handling logic to return Maybe instances. Args: convert_func: A function that converts strings to values of type T error_message: Optional custom error message for failures Returns: A parser function that returns Maybe[T] Example: >>> from decimal import Decimal >>> parse_decimal = create_parser(Decimal, "Invalid decimal format") >>> result = parse_decimal("3.14") >>> result.is_success() True """ def parser(input_value: str) -> Maybe[T]: if not input_value: return Failure('Input must not be empty') try: return Success(convert_func(input_value.strip())) except Exception as e: # noqa: BLE001 return Failure(error_message or f'Invalid {convert_func.__name__} format: {e}') return parser
@overload
[docs] def make_parser(func: Callable[[str], T]) -> Callable[[str], Maybe[T]]: ...
@overload def make_parser() -> Callable[[Callable[[str], T]], Callable[[str], Maybe[T]]]: ... def make_parser( func: Callable[[str], T] | None = None, ) -> Callable[[str], Maybe[T]] | Callable[[Callable[[str], T]], Callable[[str], Maybe[T]]]: """Create a parser function from a conversion function with a decorator. Example: @make_parser def parse_decimal(s: str) -> Decimal: return Decimal(s) # Or with parentheses @make_parser() def parse_decimal(s: str) -> Decimal: return Decimal(s) result = parse_decimal("123.45") # Returns Maybe[Decimal] """ def decorator(f: Callable[[str], T]) -> Callable[[str], Maybe[T]]: @wraps(f) def wrapper(input_value: str) -> Maybe[T]: if not input_value: return Maybe.failure('Input must not be empty') try: return Maybe.success(f(input_value.strip())) except Exception as e: # noqa: BLE001 return Maybe.failure(f'Invalid format for {f.__name__}, error: {e}') return wrapper # Handle both @create_parser and @create_parser() syntax if func is None: return decorator return decorator(func)
[docs] def validated_parser( convert_func: Callable[[str], T], validator: Callable[[T], Maybe[T]], error_message: str | None = None ) -> Callable[[str], Maybe[T]]: """Create a parser with a built-in validator. This combines parsing and validation in a single function. Args: convert_func: A function that converts strings to values of type T validator: A validator function that validates the parsed value error_message: Optional custom error message for parsing failures Returns: A parser function that returns Maybe[T] Example: >>> from decimal import Decimal >>> from valid8r.core.validators import minimum, maximum >>> # Create a parser for positive decimals >>> valid_range = lambda x: minimum(0)(x).bind(lambda y: maximum(100)(y)) >>> parse_percent = validated_parser(Decimal, valid_range) >>> result = parse_percent("42.5") >>> result.is_success() True """ parse = create_parser(convert_func, error_message) def parser(input_value: str) -> Maybe[T]: # First parse the input result = parse(input_value) # If parsing succeeded, validate the result return result.bind(validator) return parser
[docs] def parse_uuid(text: str, version: int | None = None, strict: bool = True) -> Maybe[UUID]: """Parse a string to a UUID. Uses uuid-utils to parse and validate UUIDs across versions 1, 3, 4, 5, 6, 7, and 8 when available. When ``version`` is provided, validates the parsed UUID version. In ``strict`` mode (default), a mismatch yields a Failure; otherwise, the mismatch is ignored and the UUID is returned. Args: text: The input string to parse as UUID. version: Optional expected UUID version to validate against. strict: Whether to enforce the expected version when provided. Returns: Maybe[UUID]: Success with a UUID object or Failure with an error message. """ if not text: return Maybe.failure('Input must not be empty') s = text.strip() try: # Prefer uuid-utils if available; fall back to stdlib if uuidu is not None: parsed_any = uuidu.UUID(s) parsed_version = getattr(parsed_any, 'version', None) else: parsed_std = UUID(s) parsed_version = getattr(parsed_std, 'version', None) except Exception: # noqa: BLE001 return Maybe.failure('Input must be a valid UUID') if version is not None: supported_versions = {1, 3, 4, 5, 6, 7, 8} if version not in supported_versions: return Maybe.failure(f'Unsupported UUID version: v{version}') if strict and version != parsed_version: return Maybe.failure(f'UUID version mismatch: expected v{version}, got v{parsed_version}') # Return a standard library UUID object for compatibility try: return Maybe.success(UUID(s)) except Exception: # noqa: BLE001 # This should not happen if initial parsing succeeded, but guard anyway return Maybe.failure('Input must be a valid UUID')
[docs] def parse_ipv4(text: str) -> Maybe[IPv4Address]: """Parse an IPv4 address string. Validates and parses IPv4 addresses in dotted-decimal notation. Trims surrounding whitespace. Args: text: String containing an IPv4 address (whitespace is stripped) Returns: Maybe[IPv4Address]: Success(IPv4Address) if valid, Failure(str) with error message otherwise Examples: >>> parse_ipv4("192.168.1.1") Success(IPv4Address('192.168.1.1')) >>> parse_ipv4(" 10.0.0.1 ") Success(IPv4Address('10.0.0.1')) >>> parse_ipv4("256.1.1.1").is_failure() True >>> parse_ipv4("not an ip").is_failure() True """ if not isinstance(text, str): return Maybe.failure('Input must be a string') s = text.strip() if s == '': return Maybe.failure('Input must not be empty') try: addr = ip_address(s) except ValueError: return Maybe.failure('not a valid IPv4 address') if isinstance(addr, IPv4Address): return Maybe.success(addr) return Maybe.failure('not a valid IPv4 address')
[docs] def parse_ipv6(text: str) -> Maybe[IPv6Address]: """Parse an IPv6 address string. Validates and parses IPv6 addresses in standard notation. Rejects scope IDs (e.g., %eth0). Trims surrounding whitespace. Args: text: String containing an IPv6 address (whitespace is stripped) Returns: Maybe[IPv6Address]: Success(IPv6Address) if valid, Failure(str) with error message otherwise Examples: >>> parse_ipv6("::1") Success(IPv6Address('::1')) >>> parse_ipv6("2001:0db8:85a3::8a2e:0370:7334") Success(IPv6Address('2001:db8:85a3::8a2e:370:7334')) >>> parse_ipv6(" fe80::1 ") Success(IPv6Address('fe80::1')) >>> parse_ipv6("192.168.1.1").is_failure() True """ if not isinstance(text, str): return Maybe.failure('Input must be a string') s = text.strip() if s == '': return Maybe.failure('Input must not be empty') # Explicitly reject scope IDs like %eth0 if '%' in s: return Maybe.failure('not a valid IPv6 address') try: addr = ip_address(s) except ValueError: return Maybe.failure('not a valid IPv6 address') if isinstance(addr, IPv6Address): return Maybe.success(addr) return Maybe.failure('not a valid IPv6 address')
[docs] def parse_ip(text: str) -> Maybe[IPv4Address | IPv6Address]: """Parse a string as either an IPv4 or IPv6 address. Automatically detects and parses either IPv4 or IPv6 addresses. Trims surrounding whitespace. Args: text: String containing an IP address (IPv4 or IPv6, whitespace is stripped) Returns: Maybe[IPv4Address | IPv6Address]: Success with IPv4Address or IPv6Address if valid, Failure(str) with error message otherwise Examples: >>> result = parse_ip("192.168.1.1") >>> result.is_success() True >>> result = parse_ip("::1") >>> result.is_success() True >>> parse_ip(" 10.0.0.1 ") Success(IPv4Address('10.0.0.1')) >>> parse_ip("not an ip").is_failure() True """ if not isinstance(text, str): return Maybe.failure('Input must be a string') s = text.strip() if s == '': return Maybe.failure('Input must not be empty') # Reject non-address forms such as IPv6 scope IDs or URLs if '%' in s or '://' in s: return Maybe.failure('not a valid IP address') try: addr = ip_address(s) except ValueError: return Maybe.failure('not a valid IP address') if isinstance(addr, (IPv4Address, IPv6Address)): return Maybe.success(addr) return Maybe.failure('not a valid IP address')
[docs] def parse_cidr(text: str, *, strict: bool = True) -> Maybe[IPv4Network | IPv6Network]: """Parse a CIDR network string (IPv4 or IPv6). Validates and parses network addresses in CIDR notation (e.g., 192.168.1.0/24). By default, validates that host bits are not set (strict mode). With strict=False, host bits are masked to the network address. Args: text: String containing a CIDR network (whitespace is stripped) strict: If True, reject networks with host bits set; if False, mask them (default: True) Returns: Maybe[IPv4Network | IPv6Network]: Success with IPv4Network or IPv6Network if valid, Failure(str) with error message otherwise Examples: >>> parse_cidr("192.168.1.0/24") Success(IPv4Network('192.168.1.0/24')) >>> parse_cidr("10.0.0.0/8") Success(IPv4Network('10.0.0.0/8')) >>> parse_cidr("2001:db8::/32") Success(IPv6Network('2001:db8::/32')) >>> # Strict mode rejects host bits >>> parse_cidr("192.168.1.5/24").is_failure() True >>> # Non-strict mode masks host bits >>> result = parse_cidr("192.168.1.5/24", strict=False) >>> str(result.value_or(None)) '192.168.1.0/24' """ if not isinstance(text, str): return Maybe.failure('Input must be a string') s = text.strip() if s == '': return Maybe.failure('Input must not be empty') try: net = ip_network(s, strict=strict) except ValueError as exc: msg = str(exc) if 'has host bits set' in msg: return Maybe.failure('has host bits set') return Maybe.failure('not a valid network') if isinstance(net, (IPv4Network, IPv6Network)): return Maybe.success(net) return Maybe.failure('not a valid network')
# --------------------------- # URL and Email parsing # --------------------------- @dataclass(frozen=True)
[docs] class UrlParts: """Structured URL components. Attributes: scheme: Lowercased scheme (e.g. "http"). username: Username from userinfo, if present. password: Password from userinfo, if present. host: Lowercased host or IPv6 literal without brackets, or None when not provided and not required. port: Explicit port if present, otherwise None. path: Path component as-is (no normalization). query: Query string without leading '?'. fragment: Fragment without leading '#'. Examples: >>> from valid8r.core.maybe import Success >>> match parse_url('https://alice:pw@example.com:8443/x?q=1#top'): ... case Success(u): ... (u.scheme, u.username, u.password, u.host, u.port, u.path, u.query, u.fragment) ... case _: ... () ('https', 'alice', 'pw', 'example.com', 8443, '/x', 'q=1', 'top') """
[docs] scheme: str
[docs] username: str | None
[docs] password: str | None
[docs] host: str | None
[docs] port: int | None
[docs] path: str
[docs] query: str
[docs] fragment: str
@dataclass(frozen=True)
[docs] class EmailAddress: """Structured email address. Attributes: local: Local part (preserves original case). domain: Domain part lowercased. Examples: >>> from valid8r.core.maybe import Success >>> match parse_email('First.Last+tag@Example.COM'): ... case Success(addr): ... (addr.local, addr.domain) ... case _: ... () ('First.Last+tag', 'example.com') """
[docs] local: str
[docs] domain: str
@dataclass(frozen=True)
[docs] class PhoneNumber: """Structured North American phone number (NANP). Represents a parsed and validated phone number in the North American Numbering Plan (United States, Canada, and other NANP territories). Attributes: area_code: Three-digit area code (NPA). exchange: Three-digit exchange code (NXX). subscriber: Four-digit subscriber number. country_code: Country code (always '1' for NANP). region: Two-letter region code ('US', 'CA', etc.). extension: Optional extension number. Examples: >>> from valid8r.core.maybe import Success >>> match parse_phone('(415) 555-2671'): ... case Success(phone): ... (phone.area_code, phone.exchange, phone.subscriber) ... case _: ... () ('415', '555', '2671') """
[docs] area_code: str
[docs] exchange: str
[docs] subscriber: str
[docs] country_code: str
[docs] region: str
[docs] extension: str | None
@property
[docs] def e164(self) -> str: """E.164 international format (+14155552671). The E.164 format is the international standard for phone numbers. It includes the country code prefix and no formatting separators. Returns: Phone number in E.164 format, with extension if present. """ base = f'+{self.country_code}{self.area_code}{self.exchange}{self.subscriber}' if self.extension: return f'{base} x{self.extension}' return base
@property
[docs] def national(self) -> str: """National format ((415) 555-2671). The national format is the standard format for displaying phone numbers within a country, without the country code. Returns: Phone number in national format, with extension if present. """ base = f'({self.area_code}) {self.exchange}-{self.subscriber}' if self.extension: return f'{base} ext. {self.extension}' return base
@property
[docs] def international(self) -> str: """International format (+1 415-555-2671). The international format includes the country code and uses dashes as separators. Returns: Phone number in international format, with extension if present. """ base = f'+{self.country_code} {self.area_code}-{self.exchange}-{self.subscriber}' if self.extension: return f'{base} ext. {self.extension}' return base
@property
[docs] def raw_digits(self) -> str: """Raw digits with country code (14155552671). Returns all digits including the country code, with no formatting. Does not include the extension. Returns: All digits as a string without any formatting. """ return f'{self.country_code}{self.area_code}{self.exchange}{self.subscriber}'
def _is_valid_hostname_label(label: str) -> bool: if not (1 <= len(label) <= 63): return False # Alnum or hyphen; cannot start or end with hyphen if label.startswith('-') or label.endswith('-'): return False for ch in label: if ch.isalnum() or ch == '-': continue return False return True def _is_valid_hostname(host: str) -> bool: # Allow localhost explicitly if host.lower() == 'localhost': return True if len(host) == 0 or len(host) > 253: return False # Reject underscores and empty labels labels = host.split('.') return all(not (part == '' or not _is_valid_hostname_label(part)) for part in labels) def _parse_userinfo_and_hostport(netloc: str) -> tuple[str | None, str | None, str]: """Split userinfo and hostport from a netloc string.""" if '@' in netloc: userinfo, hostport = netloc.rsplit('@', 1) if ':' in userinfo: user, pwd = userinfo.split(':', 1) else: user, pwd = userinfo, None return (user or None), (pwd or None), hostport return None, None, netloc def _parse_host_and_port(hostport: str) -> tuple[str | None, int | None]: """Parse host and optional port from hostport. Supports IPv6 literals in brackets. Returns (host, port). Host is None when missing. """ if not hostport: return None, None host = None port: int | None = None if hostport.startswith('['): # IPv6 literal [::1] or [::1]:443 if ']' not in hostport: return None, None end = hostport.find(']') host = hostport[1:end] rest = hostport[end + 1 :] if rest.startswith(':'): try: port_val = int(rest[1:]) except ValueError: return None, None if not (0 <= port_val <= 65535): return None, None port = port_val elif rest != '': # Garbage after bracket return None, None return host, port # Not bracketed: split on last ':' to allow IPv6 bracket requirement if ':' in hostport: host_candidate, port_str = hostport.rsplit(':', 1) if host_candidate == '': return None, None try: port_val = int(port_str) except ValueError: # Could be part of IPv6 without brackets (not supported by URL syntax) return hostport, None if not (0 <= port_val <= 65535): return None, None return host_candidate, port_val return hostport, None def _validate_url_host(host: str | None, original_netloc: str) -> bool: if host is None: return False # If original contained brackets or host contains ':' treat as IPv6 if original_netloc.startswith('[') or ':' in host: try: _ = ip_address(host) return isinstance(_, (IPv6Address, IPv4Address)) except ValueError: return False # Try IPv4 try: _ = ip_address(host) if isinstance(_, IPv4Address): return True except ValueError: pass # Hostname return _is_valid_hostname(host)
[docs] def parse_url( text: str, *, allowed_schemes: Iterable[str] = ('http', 'https'), require_host: bool = True, ) -> Maybe[UrlParts]: """Parse a URL with light validation. Rules: - Trim surrounding whitespace only - Require scheme in allowed_schemes (defaults to http/https) - If require_host, netloc must include a valid host (hostname, IPv4, or bracketed IPv6) - Lowercase scheme and host; do not modify path/query/fragment Failure messages (exact substrings): - Input must be a string - Input must not be empty - Unsupported URL scheme - URL requires host - Invalid host Args: text: The URL string to parse allowed_schemes: Iterable of allowed scheme names (default: ('http', 'https')) require_host: Whether to require a host in the URL (default: True) Returns: Maybe[UrlParts]: Success with UrlParts containing parsed components, or Failure with error message Examples: >>> from valid8r.core.parsers import parse_url >>> from valid8r.core.maybe import Success >>> >>> # Parse a complete URL >>> result = parse_url('https://user:pass@api.example.com:8080/v1/users?active=true#section') >>> isinstance(result, Success) True >>> url = result.value >>> url.scheme 'https' >>> url.host 'api.example.com' >>> url.port 8080 >>> url.path '/v1/users' >>> url.query 'active=true' >>> url.fragment 'section' >>> >>> # Access credentials >>> url.username 'user' >>> url.password 'pass' """ if not isinstance(text, str): return Maybe.failure('Input must be a string') s = text.strip() if s == '': return Maybe.failure('Input must not be empty') parts = urlsplit(s) scheme_lower = parts.scheme.lower() if scheme_lower == '' or scheme_lower not in {sch.lower() for sch in allowed_schemes}: return Maybe.failure('Unsupported URL scheme') username: str | None password: str | None host: str | None port: int | None username = None password = None host = None port = None netloc = parts.netloc if netloc: username, password, hostport = _parse_userinfo_and_hostport(netloc) host, port = _parse_host_and_port(hostport) if host is not None: host = host.lower() # Validate host when present if host is not None and not _validate_url_host(host, netloc): return Maybe.failure('Invalid host') elif require_host: return Maybe.failure('URL requires host') # When require_host is True we must have a host if require_host and (host is None or host == ''): return Maybe.failure('URL requires host') result = UrlParts( scheme=scheme_lower, username=username, password=password, host=host, port=port, path=parts.path, query=parts.query, fragment=parts.fragment, ) return Maybe.success(result)
[docs] def parse_email(text: str) -> Maybe[EmailAddress]: """Parse a bare email address of the form ``local@domain``. Uses the email-validator library for RFC 5322 compliant validation. Domain names are normalized to lowercase, local parts preserve their case. Requires the email-validator library to be installed. If not available, returns a Failure indicating the library is required. Rules: - Trim surrounding whitespace - Full RFC 5322 email validation - Supports internationalized domains (IDNA) - Domain is lowercased in the result; local part preserves case Failure messages: - Input must be a string - Input must not be empty - email-validator library is required but not installed - Various RFC-compliant validation error messages from email-validator Args: text: The email address string to parse Returns: Maybe[EmailAddress]: Success with EmailAddress or Failure with error message Examples: >>> from valid8r.core.parsers import parse_email >>> from valid8r.core.maybe import Success >>> >>> # Parse an email with case normalization >>> result = parse_email('User.Name+tag@Example.COM') >>> isinstance(result, Success) True >>> email = result.value >>> # Local part preserves original case >>> email.local 'User.Name+tag' >>> # Domain is normalized to lowercase >>> email.domain 'example.com' """ if not isinstance(text, str): return Maybe.failure('Input must be a string') s = text.strip() if s == '': return Maybe.failure('Input must not be empty') if not HAS_EMAIL_VALIDATOR: return Maybe.failure('email-validator library is required but not installed') try: # Validate without DNS lookups result = validate_email(s, check_deliverability=False) # Return normalized components return Maybe.success(EmailAddress(local=result.local_part, domain=result.domain)) except EmailNotValidError as e: return Maybe.failure(str(e)) except Exception as e: # noqa: BLE001 return Maybe.failure(f'email validation error: {e}')
[docs] def parse_phone(text: str | None, *, region: str = 'US', strict: bool = False) -> Maybe[PhoneNumber]: # noqa: PLR0912 """Parse a North American phone number (NANP format). Parses phone numbers in the North American Numbering Plan format (US, Canada, etc.). Supports various formatting styles and validates area codes and exchanges. Rules: - Accepts 10-digit or 11-digit (with country code 1) phone numbers - Strips all non-digit characters except extension markers - Validates area code (NPA): cannot start with 0 or 1, cannot be 555 - Validates exchange (NXX): cannot start with 0 or 1, cannot be 555 or 911 - Supports extensions with markers: x, ext, extension, comma - In strict mode, requires formatting characters (not just digits) - Defaults to US region unless specified Failure messages: - Phone number cannot be empty - Phone number must have exactly 10 digits (after country code) - Invalid area code (starts with 0/1 or reserved) - Invalid exchange (starts with 0/1, reserved, or emergency) - Only North American phone numbers are supported - Invalid format (contains non-digit/non-separator characters) - Strict mode requires formatting characters - Invalid extension (non-numeric or too long) Args: text: The phone number string to parse region: Two-letter region code (default: 'US') strict: If True, requires formatting characters (default: False) Returns: Maybe[PhoneNumber]: Success with PhoneNumber or Failure with error message Examples: >>> match parse_phone('(415) 555-2671'): ... case Success(phone): ... phone.area_code ... case _: ... None '415' >>> match parse_phone('415-555-2671 x123'): ... case Success(phone): ... phone.extension ... case _: ... None '123' >>> match parse_phone('+1 604 555 1234', region='CA'): ... case Success(phone): ... phone.region ... case _: ... None 'CA' """ # Handle None or empty input if text is None or not isinstance(text, str): return Maybe.failure('Phone number cannot be empty') s = text.strip() if s == '': return Maybe.failure('Phone number cannot be empty') # Early length guard (DoS mitigation) - check BEFORE regex operations if len(text) > 100: return Maybe.failure('Invalid format: phone number is too long') # Extract extension if present extension = None extension_match = _PHONE_EXTENSION_PATTERN.search(s) if extension_match: # Get the captured group (either group 1 or 2) extension = extension_match.group(1) or extension_match.group(2) # Validate extension length if len(extension) > 8: return Maybe.failure('Extension is too long (maximum 8 digits)') # Remove extension from phone number for parsing s = s[: extension_match.start()] # Check for invalid characters before extracting digits # Allow only: digits, whitespace (including tabs/newlines), ()-.+ and common separators if not _PHONE_VALID_CHARS_PATTERN.match(s): return Maybe.failure('Invalid format: phone number contains invalid characters') # Extract only digits digits = _PHONE_DIGIT_EXTRACTION_PATTERN.sub('', s) # Check for strict mode - original must have formatting if strict and text.strip() == digits: return Maybe.failure('Strict mode requires formatting characters (e.g., dashes, parentheses, spaces)') # Validate digit count if len(digits) == 0: return Maybe.failure('Phone number cannot be empty') # Handle country code country_code = '1' if len(digits) == 11: if digits[0] != '1': return Maybe.failure('Only North American phone numbers (country code 1) are supported') digits = digits[1:] # Strip country code elif len(digits) > 11: # Check if it starts with a non-1 digit (likely international) if digits[0] != '1': return Maybe.failure('Only North American phone numbers (country code 1) are supported') return Maybe.failure(f'Phone number must have 10 digits, got {len(digits)}') elif len(digits) != 10: return Maybe.failure(f'Phone number must have 10 digits, got {len(digits)}') # Extract components area_code = digits[0:3] exchange = digits[3:6] subscriber = digits[6:10] # Validate area code (NPA) if area_code[0] in ('0', '1'): return Maybe.failure(f'Invalid area code: {area_code} (cannot start with 0 or 1)') if area_code == '555': return Maybe.failure(f'Invalid area code: {area_code} (reserved for fiction)') # Validate exchange (NXX) if exchange[0] in ('0', '1'): return Maybe.failure(f'Invalid exchange: {exchange} (cannot start with 0 or 1)') if exchange == '911': return Maybe.failure(f'Invalid exchange: {exchange} (emergency number)') # 555 exchange with 01xx subscriber numbers (0100-0199) are reserved if exchange == '555' and subscriber.startswith('01'): return Maybe.failure(f'Invalid exchange: 555-{subscriber} (555-01xx range is reserved)') # 555 exchange with 5xxx subscriber numbers (5000-5999) are fictional if exchange == '555' and subscriber.startswith('5'): return Maybe.failure(f'Invalid exchange: 555-{subscriber} (555-5xxx range is reserved for fiction)') return Maybe.success( PhoneNumber( area_code=area_code, exchange=exchange, subscriber=subscriber, country_code=country_code, region=region, extension=extension, ) )
[docs] def parse_slug( text: str, *, min_length: int | None = None, max_length: int | None = None, ) -> Maybe[str]: """Parse a URL-safe slug (lowercase letters, numbers, hyphens only). A valid slug contains only lowercase letters, numbers, and hyphens. Cannot start/end with hyphen or have consecutive hyphens. Args: text: String to validate as slug min_length: Minimum length (optional) max_length: Maximum length (optional) Returns: Maybe[str]: Success with slug or Failure with error Examples: >>> from valid8r.core.parsers import parse_slug >>> >>> # Valid slugs >>> parse_slug('hello-world').value_or(None) 'hello-world' >>> parse_slug('blog-post-123').value_or(None) 'blog-post-123' >>> parse_slug('a').value_or(None) 'a' >>> >>> # With length constraints >>> parse_slug('hello', min_length=5).value_or(None) 'hello' >>> parse_slug('hello', max_length=10).value_or(None) 'hello' >>> >>> # Invalid slugs >>> parse_slug('').is_failure() True >>> parse_slug('Hello-World').is_failure() True >>> parse_slug('hello_world').is_failure() True >>> parse_slug('-hello').is_failure() True >>> parse_slug('hello-').is_failure() True >>> parse_slug('hello--world').is_failure() True >>> >>> # Length constraint failures >>> parse_slug('hi', min_length=5).is_failure() True >>> parse_slug('very-long-slug', max_length=5).is_failure() True """ if not text: return Maybe.failure('Slug cannot be empty') # Check length constraints if min_length is not None and len(text) < min_length: return Maybe.failure(f'Slug is too short (minimum {min_length} characters)') if max_length is not None and len(text) > max_length: return Maybe.failure(f'Slug is too long (maximum {max_length} characters)') # Check for leading hyphen if text.startswith('-'): return Maybe.failure('Slug cannot start with a hyphen') # Check for trailing hyphen if text.endswith('-'): return Maybe.failure('Slug cannot end with a hyphen') # Check for consecutive hyphens if '--' in text: return Maybe.failure('Slug cannot contain consecutive hyphens') # Check for invalid characters (not lowercase, digit, or hyphen) if not re.match(r'^[a-z0-9-]+$', text): # Check specifically for uppercase if any(c.isupper() for c in text): return Maybe.failure('Slug must contain only lowercase letters, numbers, and hyphens') return Maybe.failure('Slug contains invalid characters') return Maybe.success(text)
[docs] def parse_json(text: str) -> Maybe[object]: """Parse a JSON string into a Python object. Supports all JSON types: objects, arrays, strings, numbers, booleans, null. Args: text: JSON-formatted string Returns: Maybe[object]: Success with parsed object or Failure with error Examples: >>> from valid8r.core.parsers import parse_json >>> >>> # JSON objects >>> parse_json('{"name": "Alice", "age": 30}').value_or(None) {'name': 'Alice', 'age': 30} >>> >>> # JSON arrays >>> parse_json('[1, 2, 3, 4, 5]').value_or(None) [1, 2, 3, 4, 5] >>> >>> # JSON primitives >>> parse_json('"hello world"').value_or(None) 'hello world' >>> parse_json('42').value_or(None) 42 >>> parse_json('true').value_or(None) True >>> parse_json('false').value_or(None) False >>> parse_json('null').value_or(None) >>> >>> # Invalid JSON >>> parse_json('').is_failure() True >>> parse_json('{invalid}').is_failure() True >>> parse_json('{"name": "Alice"').is_failure() True """ if not text: return Maybe.failure('JSON input cannot be empty') try: result = json.loads(text) return Maybe.success(result) except json.JSONDecodeError as e: return Maybe.failure(f'Invalid JSON: {e.msg}')
[docs] def parse_base64(text: str) -> Maybe[bytes]: r"""Parse and decode a base64-encoded string. Accepts both standard and URL-safe base64, with or without padding. Handles whitespace and newlines within the base64 string. Args: text: Base64-encoded string Returns: Maybe[bytes]: Success with decoded bytes or Failure with error Examples: >>> from valid8r.core.parsers import parse_base64 >>> >>> # Standard base64 with padding >>> parse_base64('SGVsbG8gV29ybGQ=').value_or(None) b'Hello World' >>> >>> # Standard base64 without padding >>> parse_base64('SGVsbG8gV29ybGQ').value_or(None) b'Hello World' >>> >>> # URL-safe base64 (hyphens and underscores) >>> parse_base64('A-A=').is_success() True >>> parse_base64('Pz8_').is_success() True >>> >>> # Base64 with whitespace (automatically stripped) >>> parse_base64(' SGVsbG8gV29ybGQ= ').value_or(None) b'Hello World' >>> >>> # Invalid base64 >>> parse_base64('').is_failure() True >>> parse_base64('Not@Valid!').is_failure() True >>> parse_base64('====').is_failure() True """ # Strip all whitespace (including internal newlines) text = ''.join(text.split()) if not text: return Maybe.failure('Base64 input cannot be empty') try: # Replace URL-safe characters with standard base64 text = text.replace('-', '+').replace('_', '/') # Add padding if missing missing_padding = len(text) % 4 if missing_padding: text += '=' * (4 - missing_padding) decoded = base64.b64decode(text, validate=True) return Maybe.success(decoded) except (ValueError, binascii.Error): return Maybe.failure('Base64 contains invalid characters')
[docs] def parse_jwt(text: str) -> Maybe[str]: """Parse and validate a JWT (JSON Web Token) structure. Validates that the JWT has exactly three parts (header.payload.signature) separated by dots, and that each part is valid base64url. Also validates that header and payload are valid JSON. Note: This function validates JWT structure only. It does NOT verify the cryptographic signature. Use a dedicated JWT library (e.g., PyJWT) for signature verification and claims validation. Args: text: JWT string to validate Returns: Maybe[str]: Success with original JWT or Failure with error Examples: >>> from valid8r.core.parsers import parse_jwt >>> >>> # Valid JWT (structure only - signature not verified) >>> jwt = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.sig' >>> parse_jwt(jwt).is_success() True >>> >>> # JWT with whitespace (automatically stripped) >>> parse_jwt(' ' + jwt + ' ').is_success() True >>> >>> # Invalid: empty string >>> parse_jwt('').is_failure() True >>> >>> # Invalid: wrong number of parts >>> parse_jwt('header.payload').is_failure() True >>> parse_jwt('a.b.c.d').is_failure() True >>> >>> # Invalid: non-base64url encoding >>> parse_jwt('not-base64!.eyJzdWIiOiIxMjM0In0.sig').is_failure() True >>> >>> # Invalid: non-JSON header/payload >>> parse_jwt('bm90anNvbg==.eyJzdWIiOiIxMjM0In0.sig').is_failure() True """ # Strip whitespace text = text.strip() if not text: return Maybe.failure('JWT cannot be empty') parts = text.split('.') if len(parts) != 3: return Maybe.failure('JWT must have exactly three parts separated by dots') # Helper to convert base64url to base64 with padding def decode_base64url(part: str) -> bytes: base64_part = part.replace('-', '+').replace('_', '/') missing_padding = len(base64_part) % 4 if missing_padding: base64_part += '=' * (4 - missing_padding) return base64.b64decode(base64_part, validate=True) # Validate header (part 0) if not parts[0]: return Maybe.failure('JWT header cannot be empty') try: header_bytes = decode_base64url(parts[0]) json.loads(header_bytes) except (ValueError, binascii.Error): return Maybe.failure('JWT header is not valid base64') except json.JSONDecodeError: return Maybe.failure('JWT header is not valid JSON') # Validate payload (part 1) if not parts[1]: return Maybe.failure('JWT payload cannot be empty') try: payload_bytes = decode_base64url(parts[1]) json.loads(payload_bytes) except (ValueError, binascii.Error): return Maybe.failure('JWT payload is not valid base64') except json.JSONDecodeError: return Maybe.failure('JWT payload is not valid JSON') # Validate signature (part 2) if not parts[2]: return Maybe.failure('JWT signature cannot be empty') try: decode_base64url(parts[2]) except (ValueError, binascii.Error): return Maybe.failure('JWT signature is not valid base64') return Maybe.success(text)
[docs] def parse_path( text: str | None, *, expand_user: bool = False, resolve: bool = False, error_message: str | None = None, ) -> Maybe[Path]: """Parse a string into a pathlib.Path object. Converts string representations of filesystem paths to Python Path objects. Handles cross-platform path formats, optional home directory expansion, and optional resolution to absolute paths. Args: text: The path string to parse (leading/trailing whitespace is stripped) expand_user: If True, expand ~ to user's home directory (default: False) resolve: If True, resolve to absolute path following symlinks (default: False) error_message: Custom error message for invalid input (optional) Returns: Maybe[Path]: Success(Path) if parsing succeeds, Failure(str) with error message otherwise Examples: >>> parse_path('/home/user/file.txt') Success(PosixPath('/home/user/file.txt')) >>> parse_path('data/file.txt') Success(PosixPath('data/file.txt')) >>> parse_path('') Failure('Path cannot be empty') >>> parse_path(None) Failure('Path cannot be empty') Notes: - Path normalization (collapsing redundant separators) happens automatically - This parser does NOT validate path existence - use validators for that - Use expand_user=True to expand ~ to the user's home directory - Use resolve=True to convert relative paths to absolute paths - The resolve option will follow symlinks and normalize the path - Input length is limited to 4096 characters to prevent DoS attacks """ # Handle None or empty input if text is None or not isinstance(text, str): return Maybe.failure(error_message or 'Path cannot be empty') stripped = text.strip() if stripped == '': return Maybe.failure(error_message or 'Path cannot be empty') # CRITICAL: Early length guard (DoS mitigation) # Reject oversized inputs BEFORE expensive Path operations # Most filesystems have path length limits around 4096 bytes (PATH_MAX) if len(text) > 4096: return Maybe.failure(error_message or 'Invalid format: path is too long') try: # Create Path object (automatically normalizes redundant separators) path = Path(stripped) # Expand user directory if requested if expand_user: path = path.expanduser() # Resolve to absolute path if requested if resolve: path = path.resolve() return Maybe.success(path) except (ValueError, OSError) as e: return Maybe.failure(error_message or f'Invalid path: {e!s}')
# Public API exports __all__ = [ 'EmailAddress', 'PhoneNumber', 'UrlParts', 'create_parser', 'make_parser', 'parse_base64', 'parse_bool', 'parse_cidr', 'parse_complex', 'parse_date', 'parse_datetime', 'parse_decimal', 'parse_dict', 'parse_dict_with_validation', 'parse_email', 'parse_enum', 'parse_float', 'parse_int', 'parse_int_with_validation', 'parse_ip', 'parse_ipv4', 'parse_ipv6', 'parse_json', 'parse_jwt', 'parse_list', 'parse_list_with_validation', 'parse_path', 'parse_phone', 'parse_set', 'parse_slug', 'parse_str', 'parse_timedelta', 'parse_url', 'parse_uuid', 'validated_parser', ]