bugzilla-junker: add config validation

I had to update my own working config, so add schema validation when
parsing the config file.

Assisted-by: claude-4-sonnet
Signed-off-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
diff --git a/bugzilla-junker.py b/bugzilla-junker.py
index 496d4d5..773c447 100755
--- a/bugzilla-junker.py
+++ b/bugzilla-junker.py
@@ -17,13 +17,83 @@
 import shelve
 import datetime
 import tomllib
+import os
+import sys
 
 from urllib.parse import urlparse
 
 from typing import Optional, Set, Dict, List, Any
 
+try:
+    import jsonschema  # type: ignore
+    has_jsonschema = True
+except ImportError:
+    has_jsonschema = False
+
 VERSION = '2.0'
 
+# Configuration schema for validation
+CONFIG_SCHEMA: Dict[str, Any] = {
+    "type": "object",
+    "properties": {
+        "main": {
+            "type": "object",
+            "properties": {
+                "url": {
+                    "type": "string",
+                    "pattern": r"^https?://[^\s/$.?#].[^\s]*$",
+                    "description": "Bugzilla instance URL"
+                },
+                "apikey": {
+                    "type": "string",
+                    "minLength": 1,
+                    "description": "Bugzilla API key for authentication"
+                },
+                "logfile": {
+                    "type": ["string", "null"],
+                    "description": "Path to log file (optional)"
+                },
+                "cache": {
+                    "type": "string",
+                    "minLength": 1,
+                    "description": "Path to cache database file"
+                }
+            },
+            "required": ["url", "apikey", "cache"],
+            "additionalProperties": False
+        },
+        "spam": {
+            "type": "object",
+            "properties": {
+                "comment_tag": {
+                    "type": "string",
+                    "minLength": 1,
+                    "description": "Tag to apply to spam comments"
+                },
+                "bug_group": {
+                    "type": "string",
+                    "minLength": 1,
+                    "description": "Group to move spam bugs to"
+                },
+                "bug_status": {
+                    "type": "string",
+                    "minLength": 1,
+                    "description": "Status to set for spam bugs"
+                },
+                "bug_resolution": {
+                    "type": "string",
+                    "minLength": 1,
+                    "description": "Resolution to set for spam bugs"
+                }
+            },
+            "required": ["comment_tag", "bug_group", "bug_status", "bug_resolution"],
+            "additionalProperties": False
+        }
+    },
+    "required": ["main", "spam"],
+    "additionalProperties": False
+}
+
 logger = logging.getLogger('bugjunker')
 
 # Global variables for caching and state management
@@ -42,6 +112,95 @@
 bzshelf: Optional[shelve.Shelf[Any]] = None
 
 
+def validate_config_file_permissions(config_path: str) -> None:
+    """
+    Check that the configuration file has secure permissions.
+
+    Warns if the config file is readable by others, as it contains
+    sensitive information like API keys.
+
+    Args:
+        config_path: Path to the configuration file
+    """
+    try:
+        stat_info = os.stat(config_path)
+        # Check if file is readable by group or others (permissions & 0o044)
+        if stat_info.st_mode & 0o044:
+            logger.warning('Config file %s is readable by others - consider chmod 600', config_path)
+    except OSError as e:
+        logger.warning('Could not check permissions for %s: %s', config_path, e)
+
+
+def validate_config_schema(config_data: Dict[str, Any]) -> None:
+    """
+    Validate configuration data against the defined schema.
+
+    Uses jsonschema if available for comprehensive validation,
+    otherwise falls back to basic manual validation of required fields.
+
+    Args:
+        config_data: The parsed configuration dictionary
+
+    Raises:
+        SystemExit: If configuration validation fails
+    """
+    if has_jsonschema:
+        try:
+            jsonschema.validate(config_data, CONFIG_SCHEMA)  # type: ignore
+            logger.debug('Configuration schema validation passed')
+        except jsonschema.ValidationError as e:  # type: ignore
+            logger.error('Configuration validation error: %s', e.message)
+            logger.error('Failed at path: %s', ' -> '.join(str(p) for p in e.absolute_path))
+            if e.instance:
+                logger.error('Invalid value: %s', e.instance)
+            sys.exit(1)
+        except jsonschema.SchemaError as e:  # type: ignore
+            logger.error('Internal schema error: %s', e.message)
+            sys.exit(1)
+
+
+def validate_config_paths(config_data: Dict[str, Any]) -> None:
+    """
+    Validate that configured paths are accessible and writable where needed.
+
+    Checks that cache directory is writable and logfile directory exists
+    if a logfile is configured.
+
+    Args:
+        config_data: The parsed configuration dictionary
+    """
+    main_config = config_data.get('main', {})
+
+    # Validate cache path
+    cache_path = main_config.get('cache')
+    if cache_path:
+        cache_dir = os.path.dirname(cache_path)
+        if cache_dir and not os.path.exists(cache_dir):
+            try:
+                os.makedirs(cache_dir, mode=0o700)
+                logger.debug('Created cache directory: %s', cache_dir)
+            except OSError as e:
+                logger.error('Cannot create cache directory %s: %s', cache_dir, e)
+                sys.exit(1)
+
+        # Test if we can write to the cache location
+        if cache_dir and not os.access(cache_dir, os.W_OK):
+            logger.error('Cache directory %s is not writable', cache_dir)
+            sys.exit(1)
+
+    # Validate logfile path if specified
+    logfile = main_config.get('logfile')
+    if logfile:
+        logfile_dir = os.path.dirname(logfile)
+        if logfile_dir and not os.path.exists(logfile_dir):
+            try:
+                os.makedirs(logfile_dir, mode=0o755)
+                logger.debug('Created log directory: %s', logfile_dir)
+            except OSError as e:
+                logger.error('Cannot create log directory %s: %s', logfile_dir, e)
+                sys.exit(1)
+
+
 def get_session() -> requests.Session:
     """
     Get or create a persistent HTTP session for Bugzilla API requests.
@@ -585,8 +744,8 @@
         cmdargs.config. The file is expected to be in TOML format.
 
     Raises:
-        Various exceptions may be raised if the config file is missing,
-        malformed, or contains invalid TOML syntax.
+        SystemExit: If the config file is missing, malformed, contains invalid
+                   TOML syntax, or fails validation.
     """
     global config
 
@@ -594,9 +753,31 @@
     if config is None:
         logger.info('Loading configuration file %s', cmdargs.config)
 
-        # Load and parse TOML configuration file
-        # Open in binary mode as required by tomllib
-        config = tomllib.load(open(cmdargs.config, 'rb'))
+        # Check if config file exists
+        if not os.path.exists(cmdargs.config):
+            logger.error('Configuration file not found: %s', cmdargs.config)
+            sys.exit(1)
+
+        # Check file permissions for security
+        validate_config_file_permissions(cmdargs.config)
+
+        try:
+            # Load and parse TOML configuration file
+            # Open in binary mode as required by tomllib
+            with open(cmdargs.config, 'rb') as f:
+                config = tomllib.load(f)
+        except tomllib.TOMLDecodeError as e:
+            logger.error('Error parsing TOML configuration file: %s', e)
+            sys.exit(1)
+        except OSError as e:
+            logger.error('Error reading configuration file %s: %s', cmdargs.config, e)
+            sys.exit(1)
+
+        # Validate configuration schema
+        validate_config_schema(config)
+
+        # Validate paths and create directories if needed
+        validate_config_paths(config)
 
     return config
 
@@ -708,7 +889,7 @@
 
 
 if __name__ == '__main__':
-    description = 'Junk spammy bugzilla comments and ban their authors'
+    description = ('Junk spammy bugzilla comments and ban their authors.')
     parser = argparse.ArgumentParser(description=description, prog='bz-comment-junker.py')
     parser.add_argument('-c', '--config', required=True,
                         help='Configuration file')