scripts/dtc/dt-check-style - pub/scm/linux/kernel/git/torvalds/linux.git - Git at Google

 #!/usr/bin/env python3
 # SPDX-License-Identifier: GPL-2.0-only
 #
 # Check DTS coding style on YAML binding examples and on
 # .dts/.dtsi/.dtso source files. Enforces rules from
 # Documentation/devicetree/bindings/dts-coding-style.rst.
 #
 # Two modes:
 #   --mode=relaxed (default)
 #     Only rules that produce zero warnings on the current tree.
 #     Suitable for dt_binding_check.
 #   --mode=strict
 #     All rules. Required for new submissions.
 #
 # Two input types (auto-detected by file extension):
 #   *.yaml             -- DT binding; check each example block
 #   *.dts/*.dtsi/*.dtso -- DTS source; whole file is one block
 #
 # Rules are declared in a registry (see RULES below); each rule is
 # tagged with the lowest mode that runs it. Promoting a rule from
 # 'strict' to 'relaxed' is a one-line change.

 import argparse
 import re
 import sys
 from enum import Enum, auto

 import ruamel.yaml


 # ---------------------------------------------------------------------------
 # Line classification
 # ---------------------------------------------------------------------------

 class LineType(Enum):
     BLANK = auto()
     COMMENT = auto()         # // ... or /* ... */ on one line
     COMMENT_START = auto()   # /* without closing */
     COMMENT_BODY = auto()    # inside a multi-line comment
     COMMENT_END = auto()     # closing */
     PREPROCESSOR = auto()    # #include / #define / #ifdef / ...
     NODE_OPEN = auto()       # something { (with optional label/name/addr)
     NODE_CLOSE = auto()      # };
     PROPERTY = auto()        # name = value; or name;
     CONTINUATION = auto()    # continuation of a multi-line property


 re_cpp_directive = re.compile(
     r'^#\s*(include|define|undef|ifdef|ifndef|if|else|elif|endif|'
     r'pragma|error|warning)\b')

 # label: name@addr {  -- label and addr optional; name can be "/"
 # Per the DT spec a node name may start with a digit (e.g. 1wire@...).
 # The address part is captured loosely (any non-space, non-brace run) so
 # malformed addresses (e.g. memory@0x1000) still reach
 # check_unit_address_format() instead of silently bypassing the check.
 re_node_header = re.compile(
     r'^(?:([a-zA-Z_][a-zA-Z0-9_]*):\s*)?'
     r'([a-zA-Z0-9][a-zA-Z0-9,._+-]*|/)'
     r'(?:@([^\s{]+))?'
     r'\s*\{$')

 re_ref_node = re.compile(
     r'^&([a-zA-Z_][a-zA-Z0-9_]*)\s*\{$')


 def is_preprocessor(stripped):
     """Tell C preprocessor directives apart from DTS '#'-prefixed props."""
     return re_cpp_directive.match(stripped) is not None


 class DtsLine:
     __slots__ = ('lineno', 'raw', 'linetype', 'indent_str', 'stripped',
                  'prop_name', 'continuations',
                  'node_name', 'node_addr', 'label', 'ref_name', 'depth',
                  'closures')

     def __init__(self, lineno, raw, linetype, indent_str, stripped):
         self.lineno = lineno      # 1-based within the block
         self.raw = raw
         self.linetype = linetype
         self.indent_str = indent_str  # leading whitespace as-is
         self.stripped = stripped
         self.prop_name = None
         self.continuations = []
         self.node_name = None
         self.node_addr = None
         self.label = None
         self.ref_name = None
         self.depth = 0            # filled in by classify_lines
         self.closures = 1         # count of '}' on a NODE_CLOSE line


 def _split_code(text):
     """Return (code, opens_block) for a leading-stripped line: the
     code portion with // and /* */ comments removed (string literals
     kept verbatim), and whether a /* */ block comment is left open.
     The code portion is right-stripped so the endswith() checks in
     classify_lines see code only, not a trailing comment or blanks."""
     out = []
     i = 0
     n = len(text)
     while i < n:
         c = text[i]
         if c == '"':
             j = i + 1
             while j < n:
                 if text[j] == '\\':
                     j += 2
                     continue
                 if text[j] == '"':
                     j += 1
                     break
                 j += 1
             out.append(text[i:j])
             i = j
             continue
         if c == '/' and i + 1 < n and text[i + 1] == '/':
             break
         if c == '/' and i + 1 < n and text[i + 1] == '*':
             end = text.find('*/', i + 2)
             if end < 0:
                 return (''.join(out).rstrip(), True)
             i = end + 2
             continue
         out.append(c)
         i += 1
     return (''.join(out).rstrip(), False)


 re_only_closures = re.compile(r'(?:\}\s*;?\s*)+$')


 def classify_lines(text):
     """Return a list of DtsLine. Tracks { } depth and groups
     continuation lines onto their leading PROPERTY line."""
     out = []
     in_block_comment = False
     in_cpp_macro = False
     prev_complete = True
     depth = 0

     # Split preserving the indent string verbatim
     re_lead = re.compile(r'^([ \t]*)(.*)$')

     for i, raw in enumerate(text.split('\n'), start=1):
         m = re_lead.match(raw)
         indent_str = m.group(1)
         stripped = m.group(2)

         # Continuation of a multi-line C preprocessor directive: the
         # previous PREPROCESSOR line ended with a '\\' line splice, so
         # this line is part of the same macro. Treat it as
         # PREPROCESSOR until the splice chain ends (no trailing '\\'
         # or a blank line).
         if in_cpp_macro:
             dl = DtsLine(i, raw, LineType.PREPROCESSOR,
                          indent_str, stripped)
             dl.depth = depth
             out.append(dl)
             in_cpp_macro = (bool(stripped) and
                             stripped.rstrip().endswith('\\'))
             continue

         if not stripped:
             dl = DtsLine(i, raw, LineType.BLANK, '', '')
             dl.depth = depth
             out.append(dl)
             continue

         if in_block_comment:
             ltype = (LineType.COMMENT_END if '*/' in stripped
                      else LineType.COMMENT_BODY)
             if ltype == LineType.COMMENT_END:
                 in_block_comment = False
             dl = DtsLine(i, raw, ltype, indent_str, stripped)
             dl.depth = depth
             out.append(dl)
             continue

         if stripped.startswith('#') and is_preprocessor(stripped):
             dl = DtsLine(i, raw, LineType.PREPROCESSOR,
                          indent_str, stripped)
             dl.depth = depth
             out.append(dl)
             prev_complete = True
             in_cpp_macro = stripped.rstrip().endswith('\\')
             continue

         # Strip comments first so all later structural checks see code
         # only. An unclosed /* sets in_block_comment for the next line.
         code, opens_block = _split_code(stripped)
         if opens_block:
             in_block_comment = True

         # Pure-comment line: nothing left after stripping. Classify as
         # COMMENT_START (carries to next line) or COMMENT, and skip the
         # structural classification entirely.
         if not code:
             ltype = LineType.COMMENT_START if opens_block else LineType.COMMENT
             dl = DtsLine(i, raw, ltype, indent_str, stripped)
             dl.depth = depth
             out.append(dl)
             continue

         if not prev_complete:
             dl = DtsLine(i, raw, LineType.CONTINUATION, indent_str, code)
             dl.depth = depth
             out.append(dl)
             prev_complete = (code.endswith(';') or
                              code.endswith('{') or
                              code.endswith('};'))
             continue

         # NODE_CLOSE: the canonical form is "}" or "};" alone. A line
         # that is nothing but closures (e.g. "}; };") is still treated
         # as NODE_CLOSE for depth tracking, but the multi-closure case
         # is flagged separately by check_node_close_alone via
         # dl.closures.
         if re_only_closures.match(code):
             closures = code.count('}')
             depth = max(depth - closures, 0)
             dl = DtsLine(i, raw, LineType.NODE_CLOSE, indent_str, code)
             dl.depth = depth
             dl.closures = closures
             out.append(dl)
             prev_complete = True
             continue

         if code.endswith('{'):
             dl = DtsLine(i, raw, LineType.NODE_OPEN, indent_str, code)
             parse_node_header(dl)
             dl.depth = depth
             out.append(dl)
             depth += 1
             prev_complete = True
             continue

         # Property (or first line of a multi-line property).
         dl = DtsLine(i, raw, LineType.PROPERTY, indent_str, code)
         parse_property_name(dl)
         dl.depth = depth
         out.append(dl)
         prev_complete = code.endswith(';')

     # Group continuation lines onto their leading PROPERTY.
     last_prop = None
     grouped = []
     for dl in out:
         if dl.linetype == LineType.CONTINUATION and last_prop is not None:
             last_prop.continuations.append(dl)
             continue
         if dl.linetype == LineType.PROPERTY:
             last_prop = dl
         elif dl.linetype != LineType.BLANK and \
                 dl.linetype not in (LineType.COMMENT, LineType.COMMENT_BODY,
                                     LineType.COMMENT_END,
                                     LineType.COMMENT_START):
             last_prop = None
         grouped.append(dl)
     return grouped


 def parse_node_header(dl):
     m = re_node_header.match(dl.stripped)
     if m:
         dl.label = m.group(1)
         dl.node_name = m.group(2)
         dl.node_addr = m.group(3)
         return
     m = re_ref_node.match(dl.stripped)
     if m:
         dl.ref_name = m.group(1)


 def parse_property_name(dl):
     m = re.match(r'^([a-zA-Z0-9#][a-zA-Z0-9,._+#-]*)\s*[=;]', dl.stripped)
     if m:
         dl.prop_name = m.group(1)


 def collect_labels_and_refs(text):
     """Return (defined_labels, referenced_labels) found anywhere outside
     /* */ comments and string literals. Labels named fake_intc* (injected
     by dt-extract-example) are skipped."""
     # Strip block comments first so labels inside them don't count
     stripped = re.sub(r'/\*.*?\*/', '', text, flags=re.DOTALL)
     # Strip line comments
     stripped = re.sub(r'//[^\n]*', '', stripped)
     # Strip string literals so words inside quotes (e.g. "Error: foo")
     # are not picked up as label definitions or &-references.
     stripped = re.sub(r'"(?:[^"\\]|\\.)*"', '""', stripped)
     defined = set()
     referenced = set()
     # A label precedes a node header; the next non-space token may start
     # with a letter (foo, &ref), a digit (1wire), or '/' (root node).
     for m in re.finditer(
             r'(?:^|[\s{])([a-zA-Z_][a-zA-Z0-9_]*):\s*[a-zA-Z0-9/&]',
             stripped):
         name = m.group(1)
         if not name.startswith('fake_intc'):
             defined.add(name)
     for m in re.finditer(r'&([a-zA-Z_][a-zA-Z0-9_]*)', stripped):
         referenced.add(m.group(1))
     return defined, referenced


 # ---------------------------------------------------------------------------
 # Rule registry
 # ---------------------------------------------------------------------------

 class Ctx:
     """Context passed to each rule check. Carries the parsed lines,
     raw text, mode, and indent kind."""

     def __init__(self, lines, text, mode, indent_kind):
         self.lines = lines
         self.text = text
         self.mode = mode               # 'relaxed' or 'strict'
         self.indent_kind = indent_kind  # 'spaces' or 'tab'


 class Rule:
     __slots__ = ('name', 'mode', 'description', 'check', 'applies_to')

     def __init__(self, name, mode, description, check,
                  applies_to=('yaml', 'dts', 'dtsi', 'dtso')):
         self.name = name
         self.mode = mode               # 'relaxed' or 'strict'
         self.description = description
         self.check = check
         self.applies_to = applies_to   # input types this rule covers


 # --- individual rule check functions --------------------------------------

 def check_trailing_whitespace(ctx):
     for dl in ctx.lines:
         if dl.raw != dl.raw.rstrip():
             yield (dl.lineno, 'trailing whitespace')


 def check_tab_in_dts(ctx):
     """Reject literal tabs in DTS lines when input is YAML.

     For YAML examples, indent and content must use spaces. Tabs inside
     a #define value are tolerated (those are CPP macros, not DTS).
     For .dts files, this rule does not apply -- tabs are required.
     """
     if ctx.indent_kind != 'spaces':
         return
     for dl in ctx.lines:
         if dl.linetype == LineType.PREPROCESSOR:
             continue
         if dl.linetype == LineType.BLANK:
             continue
         if '\t' in dl.raw:
             yield (dl.lineno, 'tab character not allowed in DTS example')


 def check_mixed_indent_chars(ctx):
     """Indent must be all-spaces or all-tabs, never mixed on one line."""
     for dl in ctx.lines:
         if not dl.indent_str:
             continue
         if dl.linetype == LineType.PREPROCESSOR:
             continue
         if ' ' in dl.indent_str and '\t' in dl.indent_str:
             yield (dl.lineno, 'mixed tabs and spaces in indent')


 def detect_indent_unit(ctx):
     """Find the indent unit used at depth 1 in this block.

     Returns one of: '  ' (2 spaces), '    ' (4 spaces), '\\t' (tab),
     or None if depth-1 is empty or ambiguous."""
     for dl in ctx.lines:
         if dl.depth != 1:
             continue
         if dl.linetype in (LineType.BLANK, LineType.PREPROCESSOR):
             continue
         if dl.linetype in (LineType.COMMENT_BODY, LineType.COMMENT_END):
             continue
         if not dl.indent_str:
             continue
         if dl.indent_str == '\t':
             return '\t'
         if dl.indent_str == '    ':
             return '    '
         if dl.indent_str == '  ':
             return '  '
         # Anything else at depth 1 is non-canonical; flag elsewhere.
         return dl.indent_str
     return None


 def check_indent_unit_relaxed(ctx):
     """YAML examples: 2 or 4 spaces. Never tabs or other widths."""
     unit = detect_indent_unit(ctx)
     if unit is None:
         return
     if unit not in ('  ', '    '):
         yield (1, 'indent unit must be 2 or 4 spaces, got %r' % unit)


 def check_indent_unit_dts(ctx):
     """DTS files: 1 tab per level. Always required."""
     unit = detect_indent_unit(ctx)
     if unit is None:
         return
     if unit != '\t':
         yield (1, 'indent unit must be 1 tab in DTS, got %r' % unit)


 def check_indent_unit_strict(ctx):
     """YAML: must be exactly 4 spaces. DTS: 1 tab (same as relaxed)."""
     unit = detect_indent_unit(ctx)
     if unit is None:
         return
     if ctx.indent_kind == 'spaces':
         if unit != '    ':
             yield (1, 'indent unit must be 4 spaces in strict mode, '
                    'got %r' % unit)


 def check_indent_consistent(ctx):
     """All indented lines must be a multiple of the detected unit."""
     unit = detect_indent_unit(ctx)
     if unit is None:
         return
     if ctx.indent_kind == 'spaces':
         if unit not in ('  ', '    '):
             return  # let check_indent_unit_* report this
     else:
         if unit != '\t':
             return

     for dl in ctx.lines:
         if dl.linetype in (LineType.BLANK, LineType.PREPROCESSOR):
             continue
         if dl.linetype == LineType.CONTINUATION:
             continue   # continuations align to <, not to indent unit
         if dl.linetype in (LineType.COMMENT_BODY, LineType.COMMENT_END):
             continue
         if not dl.indent_str:
             continue
         # The indent must be 'unit' repeated dl.depth times, exactly.
         # NODE_CLOSE lines have depth equal to the post-decrement value,
         # which matches the indent expected.
         expected = unit * dl.depth
         if dl.indent_str != expected:
             yield (dl.lineno,
                    'indent mismatch (expected depth %d * %r)' %
                    (dl.depth, unit))


 def check_blank_lines(ctx):
     """No two consecutive blank lines, no leading/trailing blank lines
     in any node body."""
     lines = ctx.lines
     # Consecutive blanks
     for i in range(1, len(lines)):
         if lines[i].linetype == LineType.BLANK and \
                 lines[i - 1].linetype == LineType.BLANK:
             yield (lines[i].lineno, 'consecutive blank lines')
     # Blank right after { or right before }
     for i, dl in enumerate(lines):
         if dl.linetype != LineType.BLANK:
             continue
         prev = lines[i - 1] if i > 0 else None
         nxt = lines[i + 1] if i + 1 < len(lines) else None
         if prev is not None and prev.linetype == LineType.NODE_OPEN:
             yield (dl.lineno, 'blank line at start of node body')
         if nxt is not None and nxt.linetype == LineType.NODE_CLOSE:
             yield (dl.lineno, 'blank line at end of node body')


 def _walk_bodies(lines):
     """Yield lists of immediate-child NODE_OPEN lines for each node body
     in the input. Skips ref-nodes (&label) since those don't have an
     intrinsic ordering."""
     body_stack = [[]]
     for dl in lines:
         if dl.linetype == LineType.NODE_OPEN:
             body_stack[-1].append(dl)
             body_stack.append([])
             continue
         if dl.linetype == LineType.NODE_CLOSE:
             if len(body_stack) <= 1:
                 # Unbalanced; ignore to avoid crashing on malformed input
                 continue
             yield body_stack.pop()
             continue
     while body_stack:
         yield body_stack.pop()


 def _natural_sort_key(s):
     """Split a string into a tuple of (kind, value) pairs that compares
     numeric runs as ints, so 'foo10' sorts after 'foo2'."""
     parts = []
     for part in re.split(r'(\d+)', s):
         if part.isdigit():
             parts.append((0, int(part)))
         else:
             parts.append((1, part))
     return tuple(parts)


 def check_child_address_order(ctx):
     """Addressed siblings (foo@N) must appear in ascending address
     order within their parent node body."""
     for children in _walk_bodies(ctx.lines):
         addressed = []
         for c in children:
             if c.node_addr is None:
                 continue
             try:
                 parts = tuple(int(p, 16) for p in c.node_addr.split(','))
             except ValueError:
                 continue
             addressed.append((parts, c))
         for i in range(1, len(addressed)):
             if addressed[i][0] < addressed[i - 1][0]:
                 dl = addressed[i][1]
                 yield (dl.lineno,
                        'child node @%s out of address order' %
                        dl.node_addr)


 def check_child_name_order(ctx):
     """Unaddressed siblings must appear in natural-sort order by node
     name within their parent node body. Addressed children are scoped
     by check_child_address_order; reference nodes (&label { ... }) and
     the root node are skipped."""
     for children in _walk_bodies(ctx.lines):
         unaddressed = []
         for c in children:
             if c.node_addr is not None:
                 continue
             if c.node_name in (None, '/'):
                 continue
             if c.ref_name is not None:
                 continue
             unaddressed.append((_natural_sort_key(c.node_name), c))
         for i in range(1, len(unaddressed)):
             if unaddressed[i][0] < unaddressed[i - 1][0]:
                 dl = unaddressed[i][1]
                 yield (dl.lineno,
                        'child node %r out of name order' % dl.node_name)


 def _property_bucket(name):
     """Return the canonical bucket index for a property:
        0 compatible
        1 reg / reg-names
        2 ranges
        3 standard properties (no vendor comma in #-stripped name)
        4 vendor-specific properties
        5 status
     Plus a sub-key inside the bucket for fixed slots (compatible, reg,
     reg-names, ranges, status). 'standard' and 'vendor' return None for
     the sub-key, signalling that the within-bucket key is computed by
     the pairing rules."""
     stripped = name.lstrip('#')
     if name == 'compatible':
         return (0, 0)
     if name == 'reg':
         return (1, 0)
     if name == 'reg-names':
         return (1, 1)
     if name == 'ranges':
         return (2, 0)
     if name == 'status':
         return (5, 0)
     return (4 if ',' in stripped else 3, None)


 # Declarative pairing rules: each is a callable
 #   (name, all_names) -> anchor_name_or_None
 # If a rule returns an anchor, the property sorts immediately after the
 # anchor. Rules are tried in order; the first match wins. If none
 # matches, the within-bucket key falls back to natural sort by the
 # #-stripped name.

 def _pair_pinctrl_names(name, all_names):
     """pinctrl-names follows the highest pinctrl-N in the same node."""
     if name != 'pinctrl-names':
         return None
     cands = [n for n in all_names if re.match(r'^pinctrl-\d+$', n)]
     if not cands:
         return None
     return max(cands, key=_natural_sort_key)


 def _pair_x_names(name, all_names):
     """Generic <x>-names follows its owning property. The owner is
     usually plural (clocks/clock-names, dmas/dma-names,
     resets/reset-names) but occasionally singular (reg/reg-names is
     handled by the fixed slot above; this rule catches anything else)."""
     if not name.endswith('-names'):
         return None
     base = name[:-len('-names')]
     # Try plural and singular forms.
     if (base + 's') in all_names:
         return base + 's'
     if base in all_names:
         return base
     return None


 PAIRING_RULES = (_pair_pinctrl_names, _pair_x_names)


 def _property_sort_key(name, all_names):
     """Sort key for a property among its node-body siblings.

     Format: (bucket, within_key, tiebreak). 'within_key' for
     standard/vendor buckets follows pairing rules: a property paired
     with anchor X sorts as if it were X with a higher tiebreak."""
     bucket, fixed_sub = _property_bucket(name)
     if fixed_sub is not None:
         return (bucket, (), fixed_sub)

     for rule in PAIRING_RULES:
         anchor = rule(name, all_names)
         if anchor is not None:
             return (bucket, _natural_sort_key(anchor.lstrip('#')), 1)

     return (bucket, _natural_sort_key(name.lstrip('#')), 0)


 def check_property_order(ctx):
     """Properties within a node body must appear in canonical order:
     compatible, reg(/reg-names), ranges, then the standard group, then
     the vendor-specific group, then status. Inside the standard and
     vendor groups, pairing rules apply (e.g. <x>-names follows <x>);
     everything else falls back to natural sort by the #-stripped name."""
     lines = ctx.lines
     for i, dl in enumerate(lines):
         if dl.linetype != LineType.NODE_OPEN:
             continue
         body_depth = dl.depth + 1
         props = []
         for j in range(i + 1, len(lines)):
             d = lines[j]
             if d.linetype == LineType.NODE_CLOSE and \
                     d.depth == body_depth - 1:
                 break
             if d.linetype == LineType.PROPERTY and d.depth == body_depth \
                     and d.prop_name is not None:
                 props.append(d)
         if len(props) < 2:
             continue
         all_names = [p.prop_name for p in props]
         keyed = [(p, _property_sort_key(p.prop_name, all_names))
                  for p in props]
         for k in range(1, len(keyed)):
             if keyed[k][1] < keyed[k - 1][1]:
                 p = keyed[k][0]
                 prev = keyed[k - 1][0]
                 yield (p.lineno,
                        'property %r out of canonical order '
                        '(should sort before %r)' %
                        (p.prop_name, prev.prop_name))


 def _strip_strings_and_comments(text):
     """Remove string literals and /* */ + // comments from a single
     line, replacing them with empty strings. Used so syntactic checks
     (whitespace, hex case, etc.) don't false-positive on contents of
     quoted strings or comments. An unclosed /* on the line is treated
     as a comment running to end of line."""
     text = re.sub(r'"(?:[^"\\]|\\.)*"', '""', text)
     text = re.sub(r'/\*.*?\*/', '', text)
     text = re.sub(r'/\*.*$', '', text)
     text = re.sub(r'//.*$', '', text)
     return text


 def check_required_blank_lines(ctx):
     """A blank line must precede each child node and the 'status'
     property within a node body, except when these are the first
     substantive item in the body."""
     lines = ctx.lines
     for i, open_dl in enumerate(lines):
         if open_dl.linetype != LineType.NODE_OPEN:
             continue
         body_depth = open_dl.depth + 1
         prev_substantive = None
         between_blanks = 0
         depth_inside = 0
         for j in range(i + 1, len(lines)):
             d = lines[j]
             if d.linetype == LineType.NODE_CLOSE and \
                     d.depth == body_depth - 1 and depth_inside == 0:
                 break
             # Track depth inside nested children so we only look at
             # immediate-body items.
             if d.linetype == LineType.NODE_OPEN and \
                     d.depth >= body_depth and depth_inside > 0:
                 depth_inside += 1
                 continue
             if d.linetype == LineType.NODE_CLOSE and depth_inside > 0:
                 depth_inside -= 1
                 continue
             if depth_inside > 0:
                 continue
             if d.linetype == LineType.BLANK:
                 if prev_substantive is not None:
                     between_blanks += 1
                 continue
             if d.linetype in (LineType.COMMENT, LineType.COMMENT_START,
                               LineType.COMMENT_BODY, LineType.COMMENT_END,
                               LineType.PREPROCESSOR):
                 continue
             if d.linetype == LineType.CONTINUATION:
                 continue

             needs_blank = False
             if d.linetype == LineType.NODE_OPEN:
                 needs_blank = True
                 depth_inside = 1   # entered the child body
             elif d.linetype == LineType.PROPERTY and d.prop_name == 'status':
                 needs_blank = True

             if needs_blank and prev_substantive is not None and \
                     between_blanks == 0:
                 if d.linetype == LineType.NODE_OPEN:
                     yield (d.lineno,
                            'child node must be preceded by a blank line')
                 else:
                     yield (d.lineno,
                            '"status" must be preceded by a blank line')

             prev_substantive = d
             between_blanks = 0


 def check_hex_case(ctx):
     """Hex literals (0xN) must use lowercase digits and prefix."""
     for dl in ctx.lines:
         if dl.linetype in (LineType.BLANK, LineType.COMMENT,
                            LineType.COMMENT_START, LineType.COMMENT_BODY,
                            LineType.COMMENT_END, LineType.PREPROCESSOR):
             continue
         text = _strip_strings_and_comments(dl.raw)
         for m in re.finditer(r'\b0[xX][0-9a-fA-F]+\b', text):
             lit = m.group(0)
             if any(c.isupper() for c in lit[2:]) or lit[1] == 'X':
                 yield (dl.lineno,
                        'hex literal %r must be lowercase' % lit)


 def check_unit_address_format(ctx):
     """Unit addresses must be lowercase hex without leading zeros and
     without a '0x' prefix. For multi-cell addresses (comma-separated),
     each part is checked independently. A single '0' is permitted
     (canonical zero)."""
     for dl in ctx.lines:
         if dl.linetype != LineType.NODE_OPEN:
             continue
         if dl.node_addr is None:
             continue
         addr = dl.node_addr
         for part in addr.split(','):
             if part[:2] in ('0x', '0X'):
                 yield (dl.lineno,
                        'unit address %r must not have a "0x" prefix' %
                        addr)
                 break
             if not re.match(r'^[0-9a-fA-F]+$', part):
                 yield (dl.lineno,
                        'unit address %r is not valid hex' % addr)
                 break
             if any(c in 'ABCDEF' for c in part):
                 yield (dl.lineno,
                        'unit address %r must be lowercase hex' % addr)
                 break
             if len(part) > 1 and part.startswith('0'):
                 yield (dl.lineno,
                        'unit address %r has leading zeros' % addr)
                 break


 def check_value_whitespace(ctx):
     """A <...> cell list must have no whitespace directly after '<'
     or directly before '>'. Continuation lines are joined onto the
     property so a <...> split across lines is checked too; a '<' or
     '>' at a line break is glued straight to the neighbouring value,
     so the break itself is not counted as padding. Outside strings
     and comments only."""
     for dl in ctx.lines:
         if dl.linetype != LineType.PROPERTY:
             continue
         segs = [_strip_strings_and_comments(dl.raw).strip()]
         for cont in dl.continuations:
             segs.append(_strip_strings_and_comments(cont.stripped).strip())
         text = ''
         for s in segs:
             if not s:
                 continue
             if not text or text.endswith('<') or s.startswith('>'):
                 text += s
             else:
                 text += ' ' + s
         for m in re.finditer(r'<([^<>]*)>', text):
             content = m.group(1)
             if content and content != content.strip():
                 yield (dl.lineno, 'extra whitespace inside <...>')
                 break


 def check_node_close_alone(ctx):
     """The closing '};' of a node must be on its own line. The
     classifier accepts a canonical "}" or "};" as NODE_CLOSE; a line
     that is all closures (e.g. "}; };") is still NODE_CLOSE for depth
     tracking but is flagged here via dl.closures. Any other line that
     still contains '};' (in code, not in strings or comments) is
     mixing a node close with something else."""
     for dl in ctx.lines:
         if dl.linetype == LineType.NODE_CLOSE:
             if dl.closures > 1:
                 yield (dl.lineno,
                        'closing brace must be on its own line')
             continue
         if dl.linetype in (LineType.BLANK, LineType.COMMENT,
                            LineType.COMMENT_START, LineType.COMMENT_BODY,
                            LineType.COMMENT_END, LineType.PREPROCESSOR):
             continue
         text = _strip_strings_and_comments(dl.raw)
         if '};' in text:
             yield (dl.lineno,
                    'closing brace must be on its own line')


 def _display_col(text):
     """Visual column width of text, with tabs expanded to the next
     8-column stop, matching how printf and most editors render a
     line and the kernel-wide line length convention."""
     col = 0
     for ch in text:
         if ch == '\t':
             col = (col // 8 + 1) * 8
         else:
             col += 1
     return col


 def check_line_length(ctx):
     """Lines must not exceed 80 columns; tabs count as 8 (see
     _display_col)."""
     for dl in ctx.lines:
         if dl.linetype == LineType.BLANK:
             continue
         cols = _display_col(dl.raw)
         if cols > 80:
             yield (dl.lineno,
                    'line exceeds 80 columns (%d)' % cols)


 def check_continuation_alignment(ctx):
     """A multi-line property's continuation lines must align their
     first non-whitespace character to the display column of the first
     '<' or '"' after the '=' in the leading line. Display columns are
     used so tab-indented .dts files (where a continuation aligns with
     tabs plus spaces) are compared correctly."""
     for dl in ctx.lines:
         if dl.linetype != LineType.PROPERTY:
             continue
         if not dl.continuations:
             continue
         eq = dl.raw.find('=')
         if eq < 0:
             continue
         # First '<' or '"' after '='
         rest = dl.raw[eq + 1:]
         m = re.search(r'[<"]', rest)
         if not m:
             continue
         target_col = _display_col(dl.raw[:eq + 1 + m.start()])
         for cont in dl.continuations:
             if _display_col(cont.indent_str) != target_col:
                 yield (cont.lineno,
                        'continuation should align to column %d '
                        '(under "<" or \\")' % (target_col + 1))


 def check_unclosed_block_comment(ctx):
     """Every /* must have a matching */ in the same block. Catches both
     a comment opened on its own line (COMMENT_START) and a tail comment
     opened on a PROPERTY or other code line (where in_block_comment is
     set by _split_code so the next line becomes COMMENT_BODY without a
     preceding COMMENT_START)."""
     open_lineno = None
     for dl in ctx.lines:
         if dl.linetype == LineType.COMMENT_START:
             open_lineno = dl.lineno
         elif dl.linetype == LineType.COMMENT_END:
             open_lineno = None
         elif dl.linetype == LineType.COMMENT_BODY and open_lineno is None:
             # Block was opened by a /* tail on a code line; report at
             # the first orphan body line since the originating line is
             # already classified as something else.
             open_lineno = dl.lineno
     if open_lineno is not None:
         yield (open_lineno, 'unclosed /* block comment')


 def check_unused_labels(ctx):
     """Labels defined but never referenced are clutter."""
     defined, referenced = collect_labels_and_refs(ctx.text)
     for label in sorted(defined - referenced):
         # Find the line where this label is defined for line-number
         # reporting.
         m = re.search(r'(?m)^.*\b' + re.escape(label) + r'\s*:', ctx.text)
         lineno = ctx.text[:m.start()].count('\n') + 1 if m else 1
         yield (lineno, 'label %r defined but never &-referenced' % label)


 # --- registry --------------------------------------------------------------

 RULES = [
     # 'relaxed' is the default; rules in this group must produce zero
     # output on a clean kernel tree (post the small prep-cleanup
     # commit at the head of this series).
     Rule('trailing-whitespace', 'relaxed',
          'no trailing whitespace on any line',
          check_trailing_whitespace),
     Rule('tab-in-dts', 'relaxed',
          'YAML examples may not contain tab characters',
          check_tab_in_dts, applies_to=('yaml',)),
     Rule('mixed-indent-chars', 'relaxed',
          'indent must not mix tabs and spaces',
          check_mixed_indent_chars),
     Rule('unclosed-block-comment', 'relaxed',
          'every /* block comment must close with */',
          check_unclosed_block_comment),

     # DTS files always use tabs; this is not negotiable per kernel
     # coding style (.dts files are real source). Relaxed mode.
     Rule('indent-unit-dts', 'relaxed',
          'DTS files: 1 tab per nesting level',
          check_indent_unit_dts,
          applies_to=('dts', 'dtsi', 'dtso')),

     # 'strict' rules are opt-in (e.g. for new submissions via
     # checkpatch.pl in a follow-up series). They flag many existing
     # files and can be promoted to relaxed once those are cleaned up.
     Rule('indent-unit', 'strict',
          'YAML: 2 or 4 spaces per level',
          check_indent_unit_relaxed, applies_to=('yaml',)),
     Rule('indent-unit-strict', 'strict',
          'YAML: must be 4 spaces per level',
          check_indent_unit_strict, applies_to=('yaml',)),
     Rule('indent-consistent', 'strict',
          'every line indented at depth * unit',
          check_indent_consistent),
     Rule('blank-lines', 'strict',
          'no consecutive blanks; no blanks at node body edges',
          check_blank_lines),
     Rule('child-address-order', 'strict',
          'addressed siblings must be in ascending address order',
          check_child_address_order),
     Rule('child-name-order', 'strict',
          'unaddressed siblings must be in natural-sort name order',
          check_child_name_order),
     Rule('property-order', 'strict',
          'canonical bucket + pairing + natural-sort order of properties',
          check_property_order),
     Rule('required-blank-lines', 'strict',
          'blank line before child nodes and before "status"',
          check_required_blank_lines),
     Rule('hex-case', 'strict',
          'hex literals must be lowercase',
          check_hex_case),
     Rule('unit-address-format', 'strict',
          'unit addresses must be lowercase hex without leading zeros',
          check_unit_address_format),
     Rule('value-whitespace', 'strict',
          'no whitespace directly inside <...> brackets',
          check_value_whitespace),
     Rule('node-close-alone', 'strict',
          'closing brace must be on its own line',
          check_node_close_alone),
     Rule('line-length', 'strict',
          'lines must not exceed 80 columns',
          check_line_length),
     Rule('continuation-alignment', 'strict',
          'multi-line property continuations align under "<" or "\\""',
          check_continuation_alignment),
     Rule('unused-labels', 'strict',
          'every label must be &-referenced in the same example/file '
          '(skipped for .dtsi/.dtso since labels there are exported)',
          check_unused_labels, applies_to=('yaml', 'dts')),
 ]


 def select_rules(mode, input_kind):
     """Return rules that apply to the given mode and input type."""
     rank = {'relaxed': 0, 'strict': 1}
     out = []
     for r in RULES:
         if rank[r.mode] > rank[mode]:
             continue
         if input_kind not in r.applies_to:
             continue
         out.append(r)
     return out


 # ---------------------------------------------------------------------------
 # Block runner
 # ---------------------------------------------------------------------------

 def check_block(text, mode, indent_kind, input_type):
     """Run all selected rules on a single block of DTS text. Returns a
     list of (lineno, rule_name, message) tuples."""
     lines = classify_lines(text)
     ctx = Ctx(lines, text, mode, indent_kind)
     rules = select_rules(mode, input_type)
     findings = []
     for r in rules:
         for lineno, msg in r.check(ctx):
             findings.append((lineno, r.name, msg))
     findings.sort(key=lambda t: (t[0], t[1]))
     return findings


 # ---------------------------------------------------------------------------
 # Input drivers (YAML examples vs raw DTS)
 # ---------------------------------------------------------------------------

 def _yaml_loader():
     return ruamel.yaml.YAML()


 def iter_yaml_examples(filepath):
     """Yield (example_text, base_lineno_in_file, example_index) tuples."""
     yaml = _yaml_loader()
     try:
         with open(filepath, encoding='utf-8') as f:
             data = yaml.load(f)
     except Exception as e:
         print('%s: error loading YAML: %s' % (filepath, e),
               file=sys.stderr)
         return
     if not isinstance(data, dict) or 'examples' not in data:
         return
     examples = data['examples']
     if not hasattr(examples, '__iter__'):
         return
     for i, ex in enumerate(examples):
         if not isinstance(ex, str):
             continue
         try:
             base = examples.lc.item(i)[0] + 2
         except Exception:
             base = 1
         yield (str(ex), base, i)


 def iter_dts_file(filepath):
     """Treat the whole file as a single block."""
     try:
         with open(filepath, encoding='utf-8') as f:
             text = f.read()
     except Exception as e:
         print('%s: error reading: %s' % (filepath, e), file=sys.stderr)
         return
     yield (text, 1, None)


 # ---------------------------------------------------------------------------
 # Top-level processing
 # ---------------------------------------------------------------------------

 def input_kind(filepath):
     p = filepath.lower()
     if p.endswith('.yaml') or p.endswith('.yml'):
         return 'yaml'
     if p.endswith('.dts'):
         return 'dts'
     if p.endswith('.dtsi'):
         return 'dtsi'
     if p.endswith('.dtso'):
         return 'dtso'
     return None


 # All input types that use tab indentation and follow DTS coding style.
 DTS_FAMILY = ('dts', 'dtsi', 'dtso')


 def collect_findings(filepath, mode):
     """Return a (lines, count) pair for filepath. lines is a list of
     formatted output strings; count is the number of findings."""
     kind = input_kind(filepath)
     if kind == 'yaml':
         indent_kind = 'spaces'
         iterator = iter_yaml_examples(filepath)
     elif kind in DTS_FAMILY:
         indent_kind = 'tab'
         iterator = iter_dts_file(filepath)
     else:
         return (['%s: unknown file type, skipping' % filepath], 0)

     out = []
     for text, base, idx in iterator:
         for lineno, rule, msg in check_block(text, mode, indent_kind, kind):
             abs_line = base + lineno - 1
             ex_tag = '' if idx is None else ' example %d' % idx
             out.append('%s:%d:%s [%s] %s' %
                        (filepath, abs_line, ex_tag, rule, msg))
     return (out, len(out))


 # Worker entry point for ProcessPoolExecutor.map(). Top-level so it is
 # picklable on every platform.
 def _worker(args):
     filepath, mode = args
     return collect_findings(filepath, mode)


 def main():
     import os
     ap = argparse.ArgumentParser(
         description='Check DTS coding style on YAML examples and '
         '.dts/.dtsi/.dtso files.',
         fromfile_prefix_chars='@')
     ap.add_argument('--mode', choices=('relaxed', 'strict'),
                     default='relaxed',
                     help='which rule set to apply (default: relaxed)')
     ap.add_argument('-j', '--jobs', type=int, default=0,
                     metavar='N',
                     help='run N workers in parallel (default: respect '
                     'the make jobserver via $PARALLELISM, otherwise '
                     'os.cpu_count(); use 1 to disable multiprocessing)')
     ap.add_argument('--list-rules', action='store_true',
                     help='print all rules with their mode and exit')
     ap.add_argument('files', nargs='*', metavar='file',
                     help='YAML binding files or .dts/.dtsi/.dtso files; '
                     'use @argfile to read paths from a file')
     args = ap.parse_args()

     if args.list_rules:
         for r in RULES:
             applies = ','.join(r.applies_to)
             print('%-22s %-7s [%s] %s' %
                   (r.name, r.mode, applies, r.description))
         return 0

     if not args.files:
         ap.error('no input files')

     if args.jobs > 0:
         jobs = args.jobs
     else:
         # When invoked under scripts/jobserver-exec, $PARALLELISM
         # holds the slot count make has reserved for us; this lets
         # `make -j N dt_binding_check` constrain our worker pool to N.
         try:
             jobs = int(os.environ['PARALLELISM'])
         except (KeyError, ValueError):
             jobs = os.cpu_count() or 1
     # Single-process path: keep import surface small for tests and
     # easy debugging.
     if jobs == 1 or len(args.files) == 1:
         total = 0
         for f in args.files:
             lines, n = collect_findings(f, args.mode)
             for line in lines:
                 print(line, file=sys.stderr)
             total += n
         return 1 if total else 0

     # Multi-process path. ex.map preserves input order so output is
     # deterministic across runs.
     from concurrent.futures import ProcessPoolExecutor
     total = 0
     work = [(f, args.mode) for f in args.files]
     chunk = max(1, len(work) // (jobs * 8)) if work else 1
     with ProcessPoolExecutor(max_workers=jobs) as ex:
         for lines, n in ex.map(_worker, work, chunksize=chunk):
             for line in lines:
                 print(line, file=sys.stderr)
             total += n
     return 1 if total else 0


 if __name__ == '__main__':
     sys.exit(main())