| #!/usr/bin/env python3 |
| # SPDX-License-Identifier: GPL-2.0-only |
| # |
| # Check DTS coding style on YAML binding examples and on |
| # .dts/.dtsi/.dtso source files. Enforces rules from |
| # Documentation/devicetree/bindings/dts-coding-style.rst. |
| # |
| # Two modes: |
| # --mode=relaxed (default) |
| # Only rules that produce zero warnings on the current tree. |
| # Suitable for dt_binding_check. |
| # --mode=strict |
| # All rules. Required for new submissions. |
| # |
| # Two input types (auto-detected by file extension): |
| # *.yaml -- DT binding; check each example block |
| # *.dts/*.dtsi/*.dtso -- DTS source; whole file is one block |
| # |
| # Rules are declared in a registry (see RULES below); each rule is |
| # tagged with the lowest mode that runs it. Promoting a rule from |
| # 'strict' to 'relaxed' is a one-line change. |
| |
| import argparse |
| import re |
| import sys |
| from enum import Enum, auto |
| |
| import ruamel.yaml |
| |
| |
| # --------------------------------------------------------------------------- |
| # Line classification |
| # --------------------------------------------------------------------------- |
| |
| class LineType(Enum): |
| BLANK = auto() |
| COMMENT = auto() # // ... or /* ... */ on one line |
| COMMENT_START = auto() # /* without closing */ |
| COMMENT_BODY = auto() # inside a multi-line comment |
| COMMENT_END = auto() # closing */ |
| PREPROCESSOR = auto() # #include / #define / #ifdef / ... |
| NODE_OPEN = auto() # something { (with optional label/name/addr) |
| NODE_CLOSE = auto() # }; |
| PROPERTY = auto() # name = value; or name; |
| CONTINUATION = auto() # continuation of a multi-line property |
| |
| |
| re_cpp_directive = re.compile( |
| r'^#\s*(include|define|undef|ifdef|ifndef|if|else|elif|endif|' |
| r'pragma|error|warning)\b') |
| |
| # label: name@addr { -- label and addr optional; name can be "/" |
| # Per the DT spec a node name may start with a digit (e.g. 1wire@...). |
| # The address part is captured loosely (any non-space, non-brace run) so |
| # malformed addresses (e.g. memory@0x1000) still reach |
| # check_unit_address_format() instead of silently bypassing the check. |
| re_node_header = re.compile( |
| r'^(?:([a-zA-Z_][a-zA-Z0-9_]*):\s*)?' |
| r'([a-zA-Z0-9][a-zA-Z0-9,._+-]*|/)' |
| r'(?:@([^\s{]+))?' |
| r'\s*\{$') |
| |
| re_ref_node = re.compile( |
| r'^&([a-zA-Z_][a-zA-Z0-9_]*)\s*\{$') |
| |
| |
| def is_preprocessor(stripped): |
| """Tell C preprocessor directives apart from DTS '#'-prefixed props.""" |
| return re_cpp_directive.match(stripped) is not None |
| |
| |
| class DtsLine: |
| __slots__ = ('lineno', 'raw', 'linetype', 'indent_str', 'stripped', |
| 'prop_name', 'continuations', |
| 'node_name', 'node_addr', 'label', 'ref_name', 'depth', |
| 'closures') |
| |
| def __init__(self, lineno, raw, linetype, indent_str, stripped): |
| self.lineno = lineno # 1-based within the block |
| self.raw = raw |
| self.linetype = linetype |
| self.indent_str = indent_str # leading whitespace as-is |
| self.stripped = stripped |
| self.prop_name = None |
| self.continuations = [] |
| self.node_name = None |
| self.node_addr = None |
| self.label = None |
| self.ref_name = None |
| self.depth = 0 # filled in by classify_lines |
| self.closures = 1 # count of '}' on a NODE_CLOSE line |
| |
| |
| def _split_code(text): |
| """Return (code, opens_block) for a leading-stripped line: the |
| code portion with // and /* */ comments removed (string literals |
| kept verbatim), and whether a /* */ block comment is left open. |
| The code portion is right-stripped so the endswith() checks in |
| classify_lines see code only, not a trailing comment or blanks.""" |
| out = [] |
| i = 0 |
| n = len(text) |
| while i < n: |
| c = text[i] |
| if c == '"': |
| j = i + 1 |
| while j < n: |
| if text[j] == '\\': |
| j += 2 |
| continue |
| if text[j] == '"': |
| j += 1 |
| break |
| j += 1 |
| out.append(text[i:j]) |
| i = j |
| continue |
| if c == '/' and i + 1 < n and text[i + 1] == '/': |
| break |
| if c == '/' and i + 1 < n and text[i + 1] == '*': |
| end = text.find('*/', i + 2) |
| if end < 0: |
| return (''.join(out).rstrip(), True) |
| i = end + 2 |
| continue |
| out.append(c) |
| i += 1 |
| return (''.join(out).rstrip(), False) |
| |
| |
| re_only_closures = re.compile(r'(?:\}\s*;?\s*)+$') |
| |
| |
| def classify_lines(text): |
| """Return a list of DtsLine. Tracks { } depth and groups |
| continuation lines onto their leading PROPERTY line.""" |
| out = [] |
| in_block_comment = False |
| in_cpp_macro = False |
| prev_complete = True |
| depth = 0 |
| |
| # Split preserving the indent string verbatim |
| re_lead = re.compile(r'^([ \t]*)(.*)$') |
| |
| for i, raw in enumerate(text.split('\n'), start=1): |
| m = re_lead.match(raw) |
| indent_str = m.group(1) |
| stripped = m.group(2) |
| |
| # Continuation of a multi-line C preprocessor directive: the |
| # previous PREPROCESSOR line ended with a '\\' line splice, so |
| # this line is part of the same macro. Treat it as |
| # PREPROCESSOR until the splice chain ends (no trailing '\\' |
| # or a blank line). |
| if in_cpp_macro: |
| dl = DtsLine(i, raw, LineType.PREPROCESSOR, |
| indent_str, stripped) |
| dl.depth = depth |
| out.append(dl) |
| in_cpp_macro = (bool(stripped) and |
| stripped.rstrip().endswith('\\')) |
| continue |
| |
| if not stripped: |
| dl = DtsLine(i, raw, LineType.BLANK, '', '') |
| dl.depth = depth |
| out.append(dl) |
| continue |
| |
| if in_block_comment: |
| ltype = (LineType.COMMENT_END if '*/' in stripped |
| else LineType.COMMENT_BODY) |
| if ltype == LineType.COMMENT_END: |
| in_block_comment = False |
| dl = DtsLine(i, raw, ltype, indent_str, stripped) |
| dl.depth = depth |
| out.append(dl) |
| continue |
| |
| if stripped.startswith('#') and is_preprocessor(stripped): |
| dl = DtsLine(i, raw, LineType.PREPROCESSOR, |
| indent_str, stripped) |
| dl.depth = depth |
| out.append(dl) |
| prev_complete = True |
| in_cpp_macro = stripped.rstrip().endswith('\\') |
| continue |
| |
| # Strip comments first so all later structural checks see code |
| # only. An unclosed /* sets in_block_comment for the next line. |
| code, opens_block = _split_code(stripped) |
| if opens_block: |
| in_block_comment = True |
| |
| # Pure-comment line: nothing left after stripping. Classify as |
| # COMMENT_START (carries to next line) or COMMENT, and skip the |
| # structural classification entirely. |
| if not code: |
| ltype = LineType.COMMENT_START if opens_block else LineType.COMMENT |
| dl = DtsLine(i, raw, ltype, indent_str, stripped) |
| dl.depth = depth |
| out.append(dl) |
| continue |
| |
| if not prev_complete: |
| dl = DtsLine(i, raw, LineType.CONTINUATION, indent_str, code) |
| dl.depth = depth |
| out.append(dl) |
| prev_complete = (code.endswith(';') or |
| code.endswith('{') or |
| code.endswith('};')) |
| continue |
| |
| # NODE_CLOSE: the canonical form is "}" or "};" alone. A line |
| # that is nothing but closures (e.g. "}; };") is still treated |
| # as NODE_CLOSE for depth tracking, but the multi-closure case |
| # is flagged separately by check_node_close_alone via |
| # dl.closures. |
| if re_only_closures.match(code): |
| closures = code.count('}') |
| depth = max(depth - closures, 0) |
| dl = DtsLine(i, raw, LineType.NODE_CLOSE, indent_str, code) |
| dl.depth = depth |
| dl.closures = closures |
| out.append(dl) |
| prev_complete = True |
| continue |
| |
| if code.endswith('{'): |
| dl = DtsLine(i, raw, LineType.NODE_OPEN, indent_str, code) |
| parse_node_header(dl) |
| dl.depth = depth |
| out.append(dl) |
| depth += 1 |
| prev_complete = True |
| continue |
| |
| # Property (or first line of a multi-line property). |
| dl = DtsLine(i, raw, LineType.PROPERTY, indent_str, code) |
| parse_property_name(dl) |
| dl.depth = depth |
| out.append(dl) |
| prev_complete = code.endswith(';') |
| |
| # Group continuation lines onto their leading PROPERTY. |
| last_prop = None |
| grouped = [] |
| for dl in out: |
| if dl.linetype == LineType.CONTINUATION and last_prop is not None: |
| last_prop.continuations.append(dl) |
| continue |
| if dl.linetype == LineType.PROPERTY: |
| last_prop = dl |
| elif dl.linetype != LineType.BLANK and \ |
| dl.linetype not in (LineType.COMMENT, LineType.COMMENT_BODY, |
| LineType.COMMENT_END, |
| LineType.COMMENT_START): |
| last_prop = None |
| grouped.append(dl) |
| return grouped |
| |
| |
| def parse_node_header(dl): |
| m = re_node_header.match(dl.stripped) |
| if m: |
| dl.label = m.group(1) |
| dl.node_name = m.group(2) |
| dl.node_addr = m.group(3) |
| return |
| m = re_ref_node.match(dl.stripped) |
| if m: |
| dl.ref_name = m.group(1) |
| |
| |
| def parse_property_name(dl): |
| m = re.match(r'^([a-zA-Z0-9#][a-zA-Z0-9,._+#-]*)\s*[=;]', dl.stripped) |
| if m: |
| dl.prop_name = m.group(1) |
| |
| |
| def collect_labels_and_refs(text): |
| """Return (defined_labels, referenced_labels) found anywhere outside |
| /* */ comments and string literals. Labels named fake_intc* (injected |
| by dt-extract-example) are skipped.""" |
| # Strip block comments first so labels inside them don't count |
| stripped = re.sub(r'/\*.*?\*/', '', text, flags=re.DOTALL) |
| # Strip line comments |
| stripped = re.sub(r'//[^\n]*', '', stripped) |
| # Strip string literals so words inside quotes (e.g. "Error: foo") |
| # are not picked up as label definitions or &-references. |
| stripped = re.sub(r'"(?:[^"\\]|\\.)*"', '""', stripped) |
| defined = set() |
| referenced = set() |
| # A label precedes a node header; the next non-space token may start |
| # with a letter (foo, &ref), a digit (1wire), or '/' (root node). |
| for m in re.finditer( |
| r'(?:^|[\s{])([a-zA-Z_][a-zA-Z0-9_]*):\s*[a-zA-Z0-9/&]', |
| stripped): |
| name = m.group(1) |
| if not name.startswith('fake_intc'): |
| defined.add(name) |
| for m in re.finditer(r'&([a-zA-Z_][a-zA-Z0-9_]*)', stripped): |
| referenced.add(m.group(1)) |
| return defined, referenced |
| |
| |
| # --------------------------------------------------------------------------- |
| # Rule registry |
| # --------------------------------------------------------------------------- |
| |
| class Ctx: |
| """Context passed to each rule check. Carries the parsed lines, |
| raw text, mode, and indent kind.""" |
| |
| def __init__(self, lines, text, mode, indent_kind): |
| self.lines = lines |
| self.text = text |
| self.mode = mode # 'relaxed' or 'strict' |
| self.indent_kind = indent_kind # 'spaces' or 'tab' |
| |
| |
| class Rule: |
| __slots__ = ('name', 'mode', 'description', 'check', 'applies_to') |
| |
| def __init__(self, name, mode, description, check, |
| applies_to=('yaml', 'dts', 'dtsi', 'dtso')): |
| self.name = name |
| self.mode = mode # 'relaxed' or 'strict' |
| self.description = description |
| self.check = check |
| self.applies_to = applies_to # input types this rule covers |
| |
| |
| # --- individual rule check functions -------------------------------------- |
| |
| def check_trailing_whitespace(ctx): |
| for dl in ctx.lines: |
| if dl.raw != dl.raw.rstrip(): |
| yield (dl.lineno, 'trailing whitespace') |
| |
| |
| def check_tab_in_dts(ctx): |
| """Reject literal tabs in DTS lines when input is YAML. |
| |
| For YAML examples, indent and content must use spaces. Tabs inside |
| a #define value are tolerated (those are CPP macros, not DTS). |
| For .dts files, this rule does not apply -- tabs are required. |
| """ |
| if ctx.indent_kind != 'spaces': |
| return |
| for dl in ctx.lines: |
| if dl.linetype == LineType.PREPROCESSOR: |
| continue |
| if dl.linetype == LineType.BLANK: |
| continue |
| if '\t' in dl.raw: |
| yield (dl.lineno, 'tab character not allowed in DTS example') |
| |
| |
| def check_mixed_indent_chars(ctx): |
| """Indent must be all-spaces or all-tabs, never mixed on one line.""" |
| for dl in ctx.lines: |
| if not dl.indent_str: |
| continue |
| if dl.linetype == LineType.PREPROCESSOR: |
| continue |
| if ' ' in dl.indent_str and '\t' in dl.indent_str: |
| yield (dl.lineno, 'mixed tabs and spaces in indent') |
| |
| |
| def detect_indent_unit(ctx): |
| """Find the indent unit used at depth 1 in this block. |
| |
| Returns one of: ' ' (2 spaces), ' ' (4 spaces), '\\t' (tab), |
| or None if depth-1 is empty or ambiguous.""" |
| for dl in ctx.lines: |
| if dl.depth != 1: |
| continue |
| if dl.linetype in (LineType.BLANK, LineType.PREPROCESSOR): |
| continue |
| if dl.linetype in (LineType.COMMENT_BODY, LineType.COMMENT_END): |
| continue |
| if not dl.indent_str: |
| continue |
| if dl.indent_str == '\t': |
| return '\t' |
| if dl.indent_str == ' ': |
| return ' ' |
| if dl.indent_str == ' ': |
| return ' ' |
| # Anything else at depth 1 is non-canonical; flag elsewhere. |
| return dl.indent_str |
| return None |
| |
| |
| def check_indent_unit_relaxed(ctx): |
| """YAML examples: 2 or 4 spaces. Never tabs or other widths.""" |
| unit = detect_indent_unit(ctx) |
| if unit is None: |
| return |
| if unit not in (' ', ' '): |
| yield (1, 'indent unit must be 2 or 4 spaces, got %r' % unit) |
| |
| |
| def check_indent_unit_dts(ctx): |
| """DTS files: 1 tab per level. Always required.""" |
| unit = detect_indent_unit(ctx) |
| if unit is None: |
| return |
| if unit != '\t': |
| yield (1, 'indent unit must be 1 tab in DTS, got %r' % unit) |
| |
| |
| def check_indent_unit_strict(ctx): |
| """YAML: must be exactly 4 spaces. DTS: 1 tab (same as relaxed).""" |
| unit = detect_indent_unit(ctx) |
| if unit is None: |
| return |
| if ctx.indent_kind == 'spaces': |
| if unit != ' ': |
| yield (1, 'indent unit must be 4 spaces in strict mode, ' |
| 'got %r' % unit) |
| |
| |
| def check_indent_consistent(ctx): |
| """All indented lines must be a multiple of the detected unit.""" |
| unit = detect_indent_unit(ctx) |
| if unit is None: |
| return |
| if ctx.indent_kind == 'spaces': |
| if unit not in (' ', ' '): |
| return # let check_indent_unit_* report this |
| else: |
| if unit != '\t': |
| return |
| |
| for dl in ctx.lines: |
| if dl.linetype in (LineType.BLANK, LineType.PREPROCESSOR): |
| continue |
| if dl.linetype == LineType.CONTINUATION: |
| continue # continuations align to <, not to indent unit |
| if dl.linetype in (LineType.COMMENT_BODY, LineType.COMMENT_END): |
| continue |
| if not dl.indent_str: |
| continue |
| # The indent must be 'unit' repeated dl.depth times, exactly. |
| # NODE_CLOSE lines have depth equal to the post-decrement value, |
| # which matches the indent expected. |
| expected = unit * dl.depth |
| if dl.indent_str != expected: |
| yield (dl.lineno, |
| 'indent mismatch (expected depth %d * %r)' % |
| (dl.depth, unit)) |
| |
| |
| def check_blank_lines(ctx): |
| """No two consecutive blank lines, no leading/trailing blank lines |
| in any node body.""" |
| lines = ctx.lines |
| # Consecutive blanks |
| for i in range(1, len(lines)): |
| if lines[i].linetype == LineType.BLANK and \ |
| lines[i - 1].linetype == LineType.BLANK: |
| yield (lines[i].lineno, 'consecutive blank lines') |
| # Blank right after { or right before } |
| for i, dl in enumerate(lines): |
| if dl.linetype != LineType.BLANK: |
| continue |
| prev = lines[i - 1] if i > 0 else None |
| nxt = lines[i + 1] if i + 1 < len(lines) else None |
| if prev is not None and prev.linetype == LineType.NODE_OPEN: |
| yield (dl.lineno, 'blank line at start of node body') |
| if nxt is not None and nxt.linetype == LineType.NODE_CLOSE: |
| yield (dl.lineno, 'blank line at end of node body') |
| |
| |
| def _walk_bodies(lines): |
| """Yield lists of immediate-child NODE_OPEN lines for each node body |
| in the input. Skips ref-nodes (&label) since those don't have an |
| intrinsic ordering.""" |
| body_stack = [[]] |
| for dl in lines: |
| if dl.linetype == LineType.NODE_OPEN: |
| body_stack[-1].append(dl) |
| body_stack.append([]) |
| continue |
| if dl.linetype == LineType.NODE_CLOSE: |
| if len(body_stack) <= 1: |
| # Unbalanced; ignore to avoid crashing on malformed input |
| continue |
| yield body_stack.pop() |
| continue |
| while body_stack: |
| yield body_stack.pop() |
| |
| |
| def _natural_sort_key(s): |
| """Split a string into a tuple of (kind, value) pairs that compares |
| numeric runs as ints, so 'foo10' sorts after 'foo2'.""" |
| parts = [] |
| for part in re.split(r'(\d+)', s): |
| if part.isdigit(): |
| parts.append((0, int(part))) |
| else: |
| parts.append((1, part)) |
| return tuple(parts) |
| |
| |
| def check_child_address_order(ctx): |
| """Addressed siblings (foo@N) must appear in ascending address |
| order within their parent node body.""" |
| for children in _walk_bodies(ctx.lines): |
| addressed = [] |
| for c in children: |
| if c.node_addr is None: |
| continue |
| try: |
| parts = tuple(int(p, 16) for p in c.node_addr.split(',')) |
| except ValueError: |
| continue |
| addressed.append((parts, c)) |
| for i in range(1, len(addressed)): |
| if addressed[i][0] < addressed[i - 1][0]: |
| dl = addressed[i][1] |
| yield (dl.lineno, |
| 'child node @%s out of address order' % |
| dl.node_addr) |
| |
| |
| def check_child_name_order(ctx): |
| """Unaddressed siblings must appear in natural-sort order by node |
| name within their parent node body. Addressed children are scoped |
| by check_child_address_order; reference nodes (&label { ... }) and |
| the root node are skipped.""" |
| for children in _walk_bodies(ctx.lines): |
| unaddressed = [] |
| for c in children: |
| if c.node_addr is not None: |
| continue |
| if c.node_name in (None, '/'): |
| continue |
| if c.ref_name is not None: |
| continue |
| unaddressed.append((_natural_sort_key(c.node_name), c)) |
| for i in range(1, len(unaddressed)): |
| if unaddressed[i][0] < unaddressed[i - 1][0]: |
| dl = unaddressed[i][1] |
| yield (dl.lineno, |
| 'child node %r out of name order' % dl.node_name) |
| |
| |
| def _property_bucket(name): |
| """Return the canonical bucket index for a property: |
| 0 compatible |
| 1 reg / reg-names |
| 2 ranges |
| 3 standard properties (no vendor comma in #-stripped name) |
| 4 vendor-specific properties |
| 5 status |
| Plus a sub-key inside the bucket for fixed slots (compatible, reg, |
| reg-names, ranges, status). 'standard' and 'vendor' return None for |
| the sub-key, signalling that the within-bucket key is computed by |
| the pairing rules.""" |
| stripped = name.lstrip('#') |
| if name == 'compatible': |
| return (0, 0) |
| if name == 'reg': |
| return (1, 0) |
| if name == 'reg-names': |
| return (1, 1) |
| if name == 'ranges': |
| return (2, 0) |
| if name == 'status': |
| return (5, 0) |
| return (4 if ',' in stripped else 3, None) |
| |
| |
| # Declarative pairing rules: each is a callable |
| # (name, all_names) -> anchor_name_or_None |
| # If a rule returns an anchor, the property sorts immediately after the |
| # anchor. Rules are tried in order; the first match wins. If none |
| # matches, the within-bucket key falls back to natural sort by the |
| # #-stripped name. |
| |
| def _pair_pinctrl_names(name, all_names): |
| """pinctrl-names follows the highest pinctrl-N in the same node.""" |
| if name != 'pinctrl-names': |
| return None |
| cands = [n for n in all_names if re.match(r'^pinctrl-\d+$', n)] |
| if not cands: |
| return None |
| return max(cands, key=_natural_sort_key) |
| |
| |
| def _pair_x_names(name, all_names): |
| """Generic <x>-names follows its owning property. The owner is |
| usually plural (clocks/clock-names, dmas/dma-names, |
| resets/reset-names) but occasionally singular (reg/reg-names is |
| handled by the fixed slot above; this rule catches anything else).""" |
| if not name.endswith('-names'): |
| return None |
| base = name[:-len('-names')] |
| # Try plural and singular forms. |
| if (base + 's') in all_names: |
| return base + 's' |
| if base in all_names: |
| return base |
| return None |
| |
| |
| PAIRING_RULES = (_pair_pinctrl_names, _pair_x_names) |
| |
| |
| def _property_sort_key(name, all_names): |
| """Sort key for a property among its node-body siblings. |
| |
| Format: (bucket, within_key, tiebreak). 'within_key' for |
| standard/vendor buckets follows pairing rules: a property paired |
| with anchor X sorts as if it were X with a higher tiebreak.""" |
| bucket, fixed_sub = _property_bucket(name) |
| if fixed_sub is not None: |
| return (bucket, (), fixed_sub) |
| |
| for rule in PAIRING_RULES: |
| anchor = rule(name, all_names) |
| if anchor is not None: |
| return (bucket, _natural_sort_key(anchor.lstrip('#')), 1) |
| |
| return (bucket, _natural_sort_key(name.lstrip('#')), 0) |
| |
| |
| def check_property_order(ctx): |
| """Properties within a node body must appear in canonical order: |
| compatible, reg(/reg-names), ranges, then the standard group, then |
| the vendor-specific group, then status. Inside the standard and |
| vendor groups, pairing rules apply (e.g. <x>-names follows <x>); |
| everything else falls back to natural sort by the #-stripped name.""" |
| lines = ctx.lines |
| for i, dl in enumerate(lines): |
| if dl.linetype != LineType.NODE_OPEN: |
| continue |
| body_depth = dl.depth + 1 |
| props = [] |
| for j in range(i + 1, len(lines)): |
| d = lines[j] |
| if d.linetype == LineType.NODE_CLOSE and \ |
| d.depth == body_depth - 1: |
| break |
| if d.linetype == LineType.PROPERTY and d.depth == body_depth \ |
| and d.prop_name is not None: |
| props.append(d) |
| if len(props) < 2: |
| continue |
| all_names = [p.prop_name for p in props] |
| keyed = [(p, _property_sort_key(p.prop_name, all_names)) |
| for p in props] |
| for k in range(1, len(keyed)): |
| if keyed[k][1] < keyed[k - 1][1]: |
| p = keyed[k][0] |
| prev = keyed[k - 1][0] |
| yield (p.lineno, |
| 'property %r out of canonical order ' |
| '(should sort before %r)' % |
| (p.prop_name, prev.prop_name)) |
| |
| |
| def _strip_strings_and_comments(text): |
| """Remove string literals and /* */ + // comments from a single |
| line, replacing them with empty strings. Used so syntactic checks |
| (whitespace, hex case, etc.) don't false-positive on contents of |
| quoted strings or comments. An unclosed /* on the line is treated |
| as a comment running to end of line.""" |
| text = re.sub(r'"(?:[^"\\]|\\.)*"', '""', text) |
| text = re.sub(r'/\*.*?\*/', '', text) |
| text = re.sub(r'/\*.*$', '', text) |
| text = re.sub(r'//.*$', '', text) |
| return text |
| |
| |
| def check_required_blank_lines(ctx): |
| """A blank line must precede each child node and the 'status' |
| property within a node body, except when these are the first |
| substantive item in the body.""" |
| lines = ctx.lines |
| for i, open_dl in enumerate(lines): |
| if open_dl.linetype != LineType.NODE_OPEN: |
| continue |
| body_depth = open_dl.depth + 1 |
| prev_substantive = None |
| between_blanks = 0 |
| depth_inside = 0 |
| for j in range(i + 1, len(lines)): |
| d = lines[j] |
| if d.linetype == LineType.NODE_CLOSE and \ |
| d.depth == body_depth - 1 and depth_inside == 0: |
| break |
| # Track depth inside nested children so we only look at |
| # immediate-body items. |
| if d.linetype == LineType.NODE_OPEN and \ |
| d.depth >= body_depth and depth_inside > 0: |
| depth_inside += 1 |
| continue |
| if d.linetype == LineType.NODE_CLOSE and depth_inside > 0: |
| depth_inside -= 1 |
| continue |
| if depth_inside > 0: |
| continue |
| if d.linetype == LineType.BLANK: |
| if prev_substantive is not None: |
| between_blanks += 1 |
| continue |
| if d.linetype in (LineType.COMMENT, LineType.COMMENT_START, |
| LineType.COMMENT_BODY, LineType.COMMENT_END, |
| LineType.PREPROCESSOR): |
| continue |
| if d.linetype == LineType.CONTINUATION: |
| continue |
| |
| needs_blank = False |
| if d.linetype == LineType.NODE_OPEN: |
| needs_blank = True |
| depth_inside = 1 # entered the child body |
| elif d.linetype == LineType.PROPERTY and d.prop_name == 'status': |
| needs_blank = True |
| |
| if needs_blank and prev_substantive is not None and \ |
| between_blanks == 0: |
| if d.linetype == LineType.NODE_OPEN: |
| yield (d.lineno, |
| 'child node must be preceded by a blank line') |
| else: |
| yield (d.lineno, |
| '"status" must be preceded by a blank line') |
| |
| prev_substantive = d |
| between_blanks = 0 |
| |
| |
| def check_hex_case(ctx): |
| """Hex literals (0xN) must use lowercase digits and prefix.""" |
| for dl in ctx.lines: |
| if dl.linetype in (LineType.BLANK, LineType.COMMENT, |
| LineType.COMMENT_START, LineType.COMMENT_BODY, |
| LineType.COMMENT_END, LineType.PREPROCESSOR): |
| continue |
| text = _strip_strings_and_comments(dl.raw) |
| for m in re.finditer(r'\b0[xX][0-9a-fA-F]+\b', text): |
| lit = m.group(0) |
| if any(c.isupper() for c in lit[2:]) or lit[1] == 'X': |
| yield (dl.lineno, |
| 'hex literal %r must be lowercase' % lit) |
| |
| |
| def check_unit_address_format(ctx): |
| """Unit addresses must be lowercase hex without leading zeros and |
| without a '0x' prefix. For multi-cell addresses (comma-separated), |
| each part is checked independently. A single '0' is permitted |
| (canonical zero).""" |
| for dl in ctx.lines: |
| if dl.linetype != LineType.NODE_OPEN: |
| continue |
| if dl.node_addr is None: |
| continue |
| addr = dl.node_addr |
| for part in addr.split(','): |
| if part[:2] in ('0x', '0X'): |
| yield (dl.lineno, |
| 'unit address %r must not have a "0x" prefix' % |
| addr) |
| break |
| if not re.match(r'^[0-9a-fA-F]+$', part): |
| yield (dl.lineno, |
| 'unit address %r is not valid hex' % addr) |
| break |
| if any(c in 'ABCDEF' for c in part): |
| yield (dl.lineno, |
| 'unit address %r must be lowercase hex' % addr) |
| break |
| if len(part) > 1 and part.startswith('0'): |
| yield (dl.lineno, |
| 'unit address %r has leading zeros' % addr) |
| break |
| |
| |
| def check_value_whitespace(ctx): |
| """A <...> cell list must have no whitespace directly after '<' |
| or directly before '>'. Continuation lines are joined onto the |
| property so a <...> split across lines is checked too; a '<' or |
| '>' at a line break is glued straight to the neighbouring value, |
| so the break itself is not counted as padding. Outside strings |
| and comments only.""" |
| for dl in ctx.lines: |
| if dl.linetype != LineType.PROPERTY: |
| continue |
| segs = [_strip_strings_and_comments(dl.raw).strip()] |
| for cont in dl.continuations: |
| segs.append(_strip_strings_and_comments(cont.stripped).strip()) |
| text = '' |
| for s in segs: |
| if not s: |
| continue |
| if not text or text.endswith('<') or s.startswith('>'): |
| text += s |
| else: |
| text += ' ' + s |
| for m in re.finditer(r'<([^<>]*)>', text): |
| content = m.group(1) |
| if content and content != content.strip(): |
| yield (dl.lineno, 'extra whitespace inside <...>') |
| break |
| |
| |
| def check_node_close_alone(ctx): |
| """The closing '};' of a node must be on its own line. The |
| classifier accepts a canonical "}" or "};" as NODE_CLOSE; a line |
| that is all closures (e.g. "}; };") is still NODE_CLOSE for depth |
| tracking but is flagged here via dl.closures. Any other line that |
| still contains '};' (in code, not in strings or comments) is |
| mixing a node close with something else.""" |
| for dl in ctx.lines: |
| if dl.linetype == LineType.NODE_CLOSE: |
| if dl.closures > 1: |
| yield (dl.lineno, |
| 'closing brace must be on its own line') |
| continue |
| if dl.linetype in (LineType.BLANK, LineType.COMMENT, |
| LineType.COMMENT_START, LineType.COMMENT_BODY, |
| LineType.COMMENT_END, LineType.PREPROCESSOR): |
| continue |
| text = _strip_strings_and_comments(dl.raw) |
| if '};' in text: |
| yield (dl.lineno, |
| 'closing brace must be on its own line') |
| |
| |
| def _display_col(text): |
| """Visual column width of text, with tabs expanded to the next |
| 8-column stop, matching how printf and most editors render a |
| line and the kernel-wide line length convention.""" |
| col = 0 |
| for ch in text: |
| if ch == '\t': |
| col = (col // 8 + 1) * 8 |
| else: |
| col += 1 |
| return col |
| |
| |
| def check_line_length(ctx): |
| """Lines must not exceed 80 columns; tabs count as 8 (see |
| _display_col).""" |
| for dl in ctx.lines: |
| if dl.linetype == LineType.BLANK: |
| continue |
| cols = _display_col(dl.raw) |
| if cols > 80: |
| yield (dl.lineno, |
| 'line exceeds 80 columns (%d)' % cols) |
| |
| |
| def check_continuation_alignment(ctx): |
| """A multi-line property's continuation lines must align their |
| first non-whitespace character to the display column of the first |
| '<' or '"' after the '=' in the leading line. Display columns are |
| used so tab-indented .dts files (where a continuation aligns with |
| tabs plus spaces) are compared correctly.""" |
| for dl in ctx.lines: |
| if dl.linetype != LineType.PROPERTY: |
| continue |
| if not dl.continuations: |
| continue |
| eq = dl.raw.find('=') |
| if eq < 0: |
| continue |
| # First '<' or '"' after '=' |
| rest = dl.raw[eq + 1:] |
| m = re.search(r'[<"]', rest) |
| if not m: |
| continue |
| target_col = _display_col(dl.raw[:eq + 1 + m.start()]) |
| for cont in dl.continuations: |
| if _display_col(cont.indent_str) != target_col: |
| yield (cont.lineno, |
| 'continuation should align to column %d ' |
| '(under "<" or \\")' % (target_col + 1)) |
| |
| |
| def check_unclosed_block_comment(ctx): |
| """Every /* must have a matching */ in the same block. Catches both |
| a comment opened on its own line (COMMENT_START) and a tail comment |
| opened on a PROPERTY or other code line (where in_block_comment is |
| set by _split_code so the next line becomes COMMENT_BODY without a |
| preceding COMMENT_START).""" |
| open_lineno = None |
| for dl in ctx.lines: |
| if dl.linetype == LineType.COMMENT_START: |
| open_lineno = dl.lineno |
| elif dl.linetype == LineType.COMMENT_END: |
| open_lineno = None |
| elif dl.linetype == LineType.COMMENT_BODY and open_lineno is None: |
| # Block was opened by a /* tail on a code line; report at |
| # the first orphan body line since the originating line is |
| # already classified as something else. |
| open_lineno = dl.lineno |
| if open_lineno is not None: |
| yield (open_lineno, 'unclosed /* block comment') |
| |
| |
| def check_unused_labels(ctx): |
| """Labels defined but never referenced are clutter.""" |
| defined, referenced = collect_labels_and_refs(ctx.text) |
| for label in sorted(defined - referenced): |
| # Find the line where this label is defined for line-number |
| # reporting. |
| m = re.search(r'(?m)^.*\b' + re.escape(label) + r'\s*:', ctx.text) |
| lineno = ctx.text[:m.start()].count('\n') + 1 if m else 1 |
| yield (lineno, 'label %r defined but never &-referenced' % label) |
| |
| |
| # --- registry -------------------------------------------------------------- |
| |
| RULES = [ |
| # 'relaxed' is the default; rules in this group must produce zero |
| # output on a clean kernel tree (post the small prep-cleanup |
| # commit at the head of this series). |
| Rule('trailing-whitespace', 'relaxed', |
| 'no trailing whitespace on any line', |
| check_trailing_whitespace), |
| Rule('tab-in-dts', 'relaxed', |
| 'YAML examples may not contain tab characters', |
| check_tab_in_dts, applies_to=('yaml',)), |
| Rule('mixed-indent-chars', 'relaxed', |
| 'indent must not mix tabs and spaces', |
| check_mixed_indent_chars), |
| Rule('unclosed-block-comment', 'relaxed', |
| 'every /* block comment must close with */', |
| check_unclosed_block_comment), |
| |
| # DTS files always use tabs; this is not negotiable per kernel |
| # coding style (.dts files are real source). Relaxed mode. |
| Rule('indent-unit-dts', 'relaxed', |
| 'DTS files: 1 tab per nesting level', |
| check_indent_unit_dts, |
| applies_to=('dts', 'dtsi', 'dtso')), |
| |
| # 'strict' rules are opt-in (e.g. for new submissions via |
| # checkpatch.pl in a follow-up series). They flag many existing |
| # files and can be promoted to relaxed once those are cleaned up. |
| Rule('indent-unit', 'strict', |
| 'YAML: 2 or 4 spaces per level', |
| check_indent_unit_relaxed, applies_to=('yaml',)), |
| Rule('indent-unit-strict', 'strict', |
| 'YAML: must be 4 spaces per level', |
| check_indent_unit_strict, applies_to=('yaml',)), |
| Rule('indent-consistent', 'strict', |
| 'every line indented at depth * unit', |
| check_indent_consistent), |
| Rule('blank-lines', 'strict', |
| 'no consecutive blanks; no blanks at node body edges', |
| check_blank_lines), |
| Rule('child-address-order', 'strict', |
| 'addressed siblings must be in ascending address order', |
| check_child_address_order), |
| Rule('child-name-order', 'strict', |
| 'unaddressed siblings must be in natural-sort name order', |
| check_child_name_order), |
| Rule('property-order', 'strict', |
| 'canonical bucket + pairing + natural-sort order of properties', |
| check_property_order), |
| Rule('required-blank-lines', 'strict', |
| 'blank line before child nodes and before "status"', |
| check_required_blank_lines), |
| Rule('hex-case', 'strict', |
| 'hex literals must be lowercase', |
| check_hex_case), |
| Rule('unit-address-format', 'strict', |
| 'unit addresses must be lowercase hex without leading zeros', |
| check_unit_address_format), |
| Rule('value-whitespace', 'strict', |
| 'no whitespace directly inside <...> brackets', |
| check_value_whitespace), |
| Rule('node-close-alone', 'strict', |
| 'closing brace must be on its own line', |
| check_node_close_alone), |
| Rule('line-length', 'strict', |
| 'lines must not exceed 80 columns', |
| check_line_length), |
| Rule('continuation-alignment', 'strict', |
| 'multi-line property continuations align under "<" or "\\""', |
| check_continuation_alignment), |
| Rule('unused-labels', 'strict', |
| 'every label must be &-referenced in the same example/file ' |
| '(skipped for .dtsi/.dtso since labels there are exported)', |
| check_unused_labels, applies_to=('yaml', 'dts')), |
| ] |
| |
| |
| def select_rules(mode, input_kind): |
| """Return rules that apply to the given mode and input type.""" |
| rank = {'relaxed': 0, 'strict': 1} |
| out = [] |
| for r in RULES: |
| if rank[r.mode] > rank[mode]: |
| continue |
| if input_kind not in r.applies_to: |
| continue |
| out.append(r) |
| return out |
| |
| |
| # --------------------------------------------------------------------------- |
| # Block runner |
| # --------------------------------------------------------------------------- |
| |
| def check_block(text, mode, indent_kind, input_type): |
| """Run all selected rules on a single block of DTS text. Returns a |
| list of (lineno, rule_name, message) tuples.""" |
| lines = classify_lines(text) |
| ctx = Ctx(lines, text, mode, indent_kind) |
| rules = select_rules(mode, input_type) |
| findings = [] |
| for r in rules: |
| for lineno, msg in r.check(ctx): |
| findings.append((lineno, r.name, msg)) |
| findings.sort(key=lambda t: (t[0], t[1])) |
| return findings |
| |
| |
| # --------------------------------------------------------------------------- |
| # Input drivers (YAML examples vs raw DTS) |
| # --------------------------------------------------------------------------- |
| |
| def _yaml_loader(): |
| return ruamel.yaml.YAML() |
| |
| |
| def iter_yaml_examples(filepath): |
| """Yield (example_text, base_lineno_in_file, example_index) tuples.""" |
| yaml = _yaml_loader() |
| try: |
| with open(filepath, encoding='utf-8') as f: |
| data = yaml.load(f) |
| except Exception as e: |
| print('%s: error loading YAML: %s' % (filepath, e), |
| file=sys.stderr) |
| return |
| if not isinstance(data, dict) or 'examples' not in data: |
| return |
| examples = data['examples'] |
| if not hasattr(examples, '__iter__'): |
| return |
| for i, ex in enumerate(examples): |
| if not isinstance(ex, str): |
| continue |
| try: |
| base = examples.lc.item(i)[0] + 2 |
| except Exception: |
| base = 1 |
| yield (str(ex), base, i) |
| |
| |
| def iter_dts_file(filepath): |
| """Treat the whole file as a single block.""" |
| try: |
| with open(filepath, encoding='utf-8') as f: |
| text = f.read() |
| except Exception as e: |
| print('%s: error reading: %s' % (filepath, e), file=sys.stderr) |
| return |
| yield (text, 1, None) |
| |
| |
| # --------------------------------------------------------------------------- |
| # Top-level processing |
| # --------------------------------------------------------------------------- |
| |
| def input_kind(filepath): |
| p = filepath.lower() |
| if p.endswith('.yaml') or p.endswith('.yml'): |
| return 'yaml' |
| if p.endswith('.dts'): |
| return 'dts' |
| if p.endswith('.dtsi'): |
| return 'dtsi' |
| if p.endswith('.dtso'): |
| return 'dtso' |
| return None |
| |
| |
| # All input types that use tab indentation and follow DTS coding style. |
| DTS_FAMILY = ('dts', 'dtsi', 'dtso') |
| |
| |
| def collect_findings(filepath, mode): |
| """Return a (lines, count) pair for filepath. lines is a list of |
| formatted output strings; count is the number of findings.""" |
| kind = input_kind(filepath) |
| if kind == 'yaml': |
| indent_kind = 'spaces' |
| iterator = iter_yaml_examples(filepath) |
| elif kind in DTS_FAMILY: |
| indent_kind = 'tab' |
| iterator = iter_dts_file(filepath) |
| else: |
| return (['%s: unknown file type, skipping' % filepath], 0) |
| |
| out = [] |
| for text, base, idx in iterator: |
| for lineno, rule, msg in check_block(text, mode, indent_kind, kind): |
| abs_line = base + lineno - 1 |
| ex_tag = '' if idx is None else ' example %d' % idx |
| out.append('%s:%d:%s [%s] %s' % |
| (filepath, abs_line, ex_tag, rule, msg)) |
| return (out, len(out)) |
| |
| |
| # Worker entry point for ProcessPoolExecutor.map(). Top-level so it is |
| # picklable on every platform. |
| def _worker(args): |
| filepath, mode = args |
| return collect_findings(filepath, mode) |
| |
| |
| def main(): |
| import os |
| ap = argparse.ArgumentParser( |
| description='Check DTS coding style on YAML examples and ' |
| '.dts/.dtsi/.dtso files.', |
| fromfile_prefix_chars='@') |
| ap.add_argument('--mode', choices=('relaxed', 'strict'), |
| default='relaxed', |
| help='which rule set to apply (default: relaxed)') |
| ap.add_argument('-j', '--jobs', type=int, default=0, |
| metavar='N', |
| help='run N workers in parallel (default: respect ' |
| 'the make jobserver via $PARALLELISM, otherwise ' |
| 'os.cpu_count(); use 1 to disable multiprocessing)') |
| ap.add_argument('--list-rules', action='store_true', |
| help='print all rules with their mode and exit') |
| ap.add_argument('files', nargs='*', metavar='file', |
| help='YAML binding files or .dts/.dtsi/.dtso files; ' |
| 'use @argfile to read paths from a file') |
| args = ap.parse_args() |
| |
| if args.list_rules: |
| for r in RULES: |
| applies = ','.join(r.applies_to) |
| print('%-22s %-7s [%s] %s' % |
| (r.name, r.mode, applies, r.description)) |
| return 0 |
| |
| if not args.files: |
| ap.error('no input files') |
| |
| if args.jobs > 0: |
| jobs = args.jobs |
| else: |
| # When invoked under scripts/jobserver-exec, $PARALLELISM |
| # holds the slot count make has reserved for us; this lets |
| # `make -j N dt_binding_check` constrain our worker pool to N. |
| try: |
| jobs = int(os.environ['PARALLELISM']) |
| except (KeyError, ValueError): |
| jobs = os.cpu_count() or 1 |
| # Single-process path: keep import surface small for tests and |
| # easy debugging. |
| if jobs == 1 or len(args.files) == 1: |
| total = 0 |
| for f in args.files: |
| lines, n = collect_findings(f, args.mode) |
| for line in lines: |
| print(line, file=sys.stderr) |
| total += n |
| return 1 if total else 0 |
| |
| # Multi-process path. ex.map preserves input order so output is |
| # deterministic across runs. |
| from concurrent.futures import ProcessPoolExecutor |
| total = 0 |
| work = [(f, args.mode) for f in args.files] |
| chunk = max(1, len(work) // (jobs * 8)) if work else 1 |
| with ProcessPoolExecutor(max_workers=jobs) as ex: |
| for lines, n in ex.map(_worker, work, chunksize=chunk): |
| for line in lines: |
| print(line, file=sys.stderr) |
| total += n |
| return 1 if total else 0 |
| |
| |
| if __name__ == '__main__': |
| sys.exit(main()) |