blob: 2d5723d41ea391e1e1bb070230c543f639726aca [file]
#!/usr/bin/env python3
# SPDX-License-Identifier: GPL-2.0-only
#
# Check DTS coding style on YAML binding examples and on
# .dts/.dtsi/.dtso source files. Enforces rules from
# Documentation/devicetree/bindings/dts-coding-style.rst.
#
# Two modes:
# --mode=relaxed (default)
# Only rules that produce zero warnings on the current tree.
# Suitable for dt_binding_check.
# --mode=strict
# All rules. Required for new submissions.
#
# Two input types (auto-detected by file extension):
# *.yaml -- DT binding; check each example block
# *.dts/*.dtsi/*.dtso -- DTS source; whole file is one block
#
# Rules are declared in a registry (see RULES below); each rule is
# tagged with the lowest mode that runs it. Promoting a rule from
# 'strict' to 'relaxed' is a one-line change.
import argparse
import re
import sys
from enum import Enum, auto
import ruamel.yaml
# ---------------------------------------------------------------------------
# Line classification
# ---------------------------------------------------------------------------
class LineType(Enum):
BLANK = auto()
COMMENT = auto() # // ... or /* ... */ on one line
COMMENT_START = auto() # /* without closing */
COMMENT_BODY = auto() # inside a multi-line comment
COMMENT_END = auto() # closing */
PREPROCESSOR = auto() # #include / #define / #ifdef / ...
NODE_OPEN = auto() # something { (with optional label/name/addr)
NODE_CLOSE = auto() # };
PROPERTY = auto() # name = value; or name;
CONTINUATION = auto() # continuation of a multi-line property
re_cpp_directive = re.compile(
r'^#\s*(include|define|undef|ifdef|ifndef|if|else|elif|endif|'
r'pragma|error|warning)\b')
# label: name@addr { -- label and addr optional; name can be "/"
# Per the DT spec a node name may start with a digit (e.g. 1wire@...).
# The address part is captured loosely (any non-space, non-brace run) so
# malformed addresses (e.g. memory@0x1000) still reach
# check_unit_address_format() instead of silently bypassing the check.
re_node_header = re.compile(
r'^(?:([a-zA-Z_][a-zA-Z0-9_]*):\s*)?'
r'([a-zA-Z0-9][a-zA-Z0-9,._+-]*|/)'
r'(?:@([^\s{]+))?'
r'\s*\{$')
re_ref_node = re.compile(
r'^&([a-zA-Z_][a-zA-Z0-9_]*)\s*\{$')
def is_preprocessor(stripped):
"""Tell C preprocessor directives apart from DTS '#'-prefixed props."""
return re_cpp_directive.match(stripped) is not None
class DtsLine:
__slots__ = ('lineno', 'raw', 'linetype', 'indent_str', 'stripped',
'prop_name', 'continuations',
'node_name', 'node_addr', 'label', 'ref_name', 'depth',
'closures')
def __init__(self, lineno, raw, linetype, indent_str, stripped):
self.lineno = lineno # 1-based within the block
self.raw = raw
self.linetype = linetype
self.indent_str = indent_str # leading whitespace as-is
self.stripped = stripped
self.prop_name = None
self.continuations = []
self.node_name = None
self.node_addr = None
self.label = None
self.ref_name = None
self.depth = 0 # filled in by classify_lines
self.closures = 1 # count of '}' on a NODE_CLOSE line
def _split_code(text):
"""Return (code, opens_block) for a leading-stripped line: the
code portion with // and /* */ comments removed (string literals
kept verbatim), and whether a /* */ block comment is left open.
The code portion is right-stripped so the endswith() checks in
classify_lines see code only, not a trailing comment or blanks."""
out = []
i = 0
n = len(text)
while i < n:
c = text[i]
if c == '"':
j = i + 1
while j < n:
if text[j] == '\\':
j += 2
continue
if text[j] == '"':
j += 1
break
j += 1
out.append(text[i:j])
i = j
continue
if c == '/' and i + 1 < n and text[i + 1] == '/':
break
if c == '/' and i + 1 < n and text[i + 1] == '*':
end = text.find('*/', i + 2)
if end < 0:
return (''.join(out).rstrip(), True)
i = end + 2
continue
out.append(c)
i += 1
return (''.join(out).rstrip(), False)
re_only_closures = re.compile(r'(?:\}\s*;?\s*)+$')
def classify_lines(text):
"""Return a list of DtsLine. Tracks { } depth and groups
continuation lines onto their leading PROPERTY line."""
out = []
in_block_comment = False
in_cpp_macro = False
prev_complete = True
depth = 0
# Split preserving the indent string verbatim
re_lead = re.compile(r'^([ \t]*)(.*)$')
for i, raw in enumerate(text.split('\n'), start=1):
m = re_lead.match(raw)
indent_str = m.group(1)
stripped = m.group(2)
# Continuation of a multi-line C preprocessor directive: the
# previous PREPROCESSOR line ended with a '\\' line splice, so
# this line is part of the same macro. Treat it as
# PREPROCESSOR until the splice chain ends (no trailing '\\'
# or a blank line).
if in_cpp_macro:
dl = DtsLine(i, raw, LineType.PREPROCESSOR,
indent_str, stripped)
dl.depth = depth
out.append(dl)
in_cpp_macro = (bool(stripped) and
stripped.rstrip().endswith('\\'))
continue
if not stripped:
dl = DtsLine(i, raw, LineType.BLANK, '', '')
dl.depth = depth
out.append(dl)
continue
if in_block_comment:
ltype = (LineType.COMMENT_END if '*/' in stripped
else LineType.COMMENT_BODY)
if ltype == LineType.COMMENT_END:
in_block_comment = False
dl = DtsLine(i, raw, ltype, indent_str, stripped)
dl.depth = depth
out.append(dl)
continue
if stripped.startswith('#') and is_preprocessor(stripped):
dl = DtsLine(i, raw, LineType.PREPROCESSOR,
indent_str, stripped)
dl.depth = depth
out.append(dl)
prev_complete = True
in_cpp_macro = stripped.rstrip().endswith('\\')
continue
# Strip comments first so all later structural checks see code
# only. An unclosed /* sets in_block_comment for the next line.
code, opens_block = _split_code(stripped)
if opens_block:
in_block_comment = True
# Pure-comment line: nothing left after stripping. Classify as
# COMMENT_START (carries to next line) or COMMENT, and skip the
# structural classification entirely.
if not code:
ltype = LineType.COMMENT_START if opens_block else LineType.COMMENT
dl = DtsLine(i, raw, ltype, indent_str, stripped)
dl.depth = depth
out.append(dl)
continue
if not prev_complete:
dl = DtsLine(i, raw, LineType.CONTINUATION, indent_str, code)
dl.depth = depth
out.append(dl)
prev_complete = (code.endswith(';') or
code.endswith('{') or
code.endswith('};'))
continue
# NODE_CLOSE: the canonical form is "}" or "};" alone. A line
# that is nothing but closures (e.g. "}; };") is still treated
# as NODE_CLOSE for depth tracking, but the multi-closure case
# is flagged separately by check_node_close_alone via
# dl.closures.
if re_only_closures.match(code):
closures = code.count('}')
depth = max(depth - closures, 0)
dl = DtsLine(i, raw, LineType.NODE_CLOSE, indent_str, code)
dl.depth = depth
dl.closures = closures
out.append(dl)
prev_complete = True
continue
if code.endswith('{'):
dl = DtsLine(i, raw, LineType.NODE_OPEN, indent_str, code)
parse_node_header(dl)
dl.depth = depth
out.append(dl)
depth += 1
prev_complete = True
continue
# Property (or first line of a multi-line property).
dl = DtsLine(i, raw, LineType.PROPERTY, indent_str, code)
parse_property_name(dl)
dl.depth = depth
out.append(dl)
prev_complete = code.endswith(';')
# Group continuation lines onto their leading PROPERTY.
last_prop = None
grouped = []
for dl in out:
if dl.linetype == LineType.CONTINUATION and last_prop is not None:
last_prop.continuations.append(dl)
continue
if dl.linetype == LineType.PROPERTY:
last_prop = dl
elif dl.linetype != LineType.BLANK and \
dl.linetype not in (LineType.COMMENT, LineType.COMMENT_BODY,
LineType.COMMENT_END,
LineType.COMMENT_START):
last_prop = None
grouped.append(dl)
return grouped
def parse_node_header(dl):
m = re_node_header.match(dl.stripped)
if m:
dl.label = m.group(1)
dl.node_name = m.group(2)
dl.node_addr = m.group(3)
return
m = re_ref_node.match(dl.stripped)
if m:
dl.ref_name = m.group(1)
def parse_property_name(dl):
m = re.match(r'^([a-zA-Z0-9#][a-zA-Z0-9,._+#-]*)\s*[=;]', dl.stripped)
if m:
dl.prop_name = m.group(1)
def collect_labels_and_refs(text):
"""Return (defined_labels, referenced_labels) found anywhere outside
/* */ comments and string literals. Labels named fake_intc* (injected
by dt-extract-example) are skipped."""
# Strip block comments first so labels inside them don't count
stripped = re.sub(r'/\*.*?\*/', '', text, flags=re.DOTALL)
# Strip line comments
stripped = re.sub(r'//[^\n]*', '', stripped)
# Strip string literals so words inside quotes (e.g. "Error: foo")
# are not picked up as label definitions or &-references.
stripped = re.sub(r'"(?:[^"\\]|\\.)*"', '""', stripped)
defined = set()
referenced = set()
# A label precedes a node header; the next non-space token may start
# with a letter (foo, &ref), a digit (1wire), or '/' (root node).
for m in re.finditer(
r'(?:^|[\s{])([a-zA-Z_][a-zA-Z0-9_]*):\s*[a-zA-Z0-9/&]',
stripped):
name = m.group(1)
if not name.startswith('fake_intc'):
defined.add(name)
for m in re.finditer(r'&([a-zA-Z_][a-zA-Z0-9_]*)', stripped):
referenced.add(m.group(1))
return defined, referenced
# ---------------------------------------------------------------------------
# Rule registry
# ---------------------------------------------------------------------------
class Ctx:
"""Context passed to each rule check. Carries the parsed lines,
raw text, mode, and indent kind."""
def __init__(self, lines, text, mode, indent_kind):
self.lines = lines
self.text = text
self.mode = mode # 'relaxed' or 'strict'
self.indent_kind = indent_kind # 'spaces' or 'tab'
class Rule:
__slots__ = ('name', 'mode', 'description', 'check', 'applies_to')
def __init__(self, name, mode, description, check,
applies_to=('yaml', 'dts', 'dtsi', 'dtso')):
self.name = name
self.mode = mode # 'relaxed' or 'strict'
self.description = description
self.check = check
self.applies_to = applies_to # input types this rule covers
# --- individual rule check functions --------------------------------------
def check_trailing_whitespace(ctx):
for dl in ctx.lines:
if dl.raw != dl.raw.rstrip():
yield (dl.lineno, 'trailing whitespace')
def check_tab_in_dts(ctx):
"""Reject literal tabs in DTS lines when input is YAML.
For YAML examples, indent and content must use spaces. Tabs inside
a #define value are tolerated (those are CPP macros, not DTS).
For .dts files, this rule does not apply -- tabs are required.
"""
if ctx.indent_kind != 'spaces':
return
for dl in ctx.lines:
if dl.linetype == LineType.PREPROCESSOR:
continue
if dl.linetype == LineType.BLANK:
continue
if '\t' in dl.raw:
yield (dl.lineno, 'tab character not allowed in DTS example')
def check_mixed_indent_chars(ctx):
"""Indent must be all-spaces or all-tabs, never mixed on one line."""
for dl in ctx.lines:
if not dl.indent_str:
continue
if dl.linetype == LineType.PREPROCESSOR:
continue
if ' ' in dl.indent_str and '\t' in dl.indent_str:
yield (dl.lineno, 'mixed tabs and spaces in indent')
def detect_indent_unit(ctx):
"""Find the indent unit used at depth 1 in this block.
Returns one of: ' ' (2 spaces), ' ' (4 spaces), '\\t' (tab),
or None if depth-1 is empty or ambiguous."""
for dl in ctx.lines:
if dl.depth != 1:
continue
if dl.linetype in (LineType.BLANK, LineType.PREPROCESSOR):
continue
if dl.linetype in (LineType.COMMENT_BODY, LineType.COMMENT_END):
continue
if not dl.indent_str:
continue
if dl.indent_str == '\t':
return '\t'
if dl.indent_str == ' ':
return ' '
if dl.indent_str == ' ':
return ' '
# Anything else at depth 1 is non-canonical; flag elsewhere.
return dl.indent_str
return None
def check_indent_unit_relaxed(ctx):
"""YAML examples: 2 or 4 spaces. Never tabs or other widths."""
unit = detect_indent_unit(ctx)
if unit is None:
return
if unit not in (' ', ' '):
yield (1, 'indent unit must be 2 or 4 spaces, got %r' % unit)
def check_indent_unit_dts(ctx):
"""DTS files: 1 tab per level. Always required."""
unit = detect_indent_unit(ctx)
if unit is None:
return
if unit != '\t':
yield (1, 'indent unit must be 1 tab in DTS, got %r' % unit)
def check_indent_unit_strict(ctx):
"""YAML: must be exactly 4 spaces. DTS: 1 tab (same as relaxed)."""
unit = detect_indent_unit(ctx)
if unit is None:
return
if ctx.indent_kind == 'spaces':
if unit != ' ':
yield (1, 'indent unit must be 4 spaces in strict mode, '
'got %r' % unit)
def check_indent_consistent(ctx):
"""All indented lines must be a multiple of the detected unit."""
unit = detect_indent_unit(ctx)
if unit is None:
return
if ctx.indent_kind == 'spaces':
if unit not in (' ', ' '):
return # let check_indent_unit_* report this
else:
if unit != '\t':
return
for dl in ctx.lines:
if dl.linetype in (LineType.BLANK, LineType.PREPROCESSOR):
continue
if dl.linetype == LineType.CONTINUATION:
continue # continuations align to <, not to indent unit
if dl.linetype in (LineType.COMMENT_BODY, LineType.COMMENT_END):
continue
if not dl.indent_str:
continue
# The indent must be 'unit' repeated dl.depth times, exactly.
# NODE_CLOSE lines have depth equal to the post-decrement value,
# which matches the indent expected.
expected = unit * dl.depth
if dl.indent_str != expected:
yield (dl.lineno,
'indent mismatch (expected depth %d * %r)' %
(dl.depth, unit))
def check_blank_lines(ctx):
"""No two consecutive blank lines, no leading/trailing blank lines
in any node body."""
lines = ctx.lines
# Consecutive blanks
for i in range(1, len(lines)):
if lines[i].linetype == LineType.BLANK and \
lines[i - 1].linetype == LineType.BLANK:
yield (lines[i].lineno, 'consecutive blank lines')
# Blank right after { or right before }
for i, dl in enumerate(lines):
if dl.linetype != LineType.BLANK:
continue
prev = lines[i - 1] if i > 0 else None
nxt = lines[i + 1] if i + 1 < len(lines) else None
if prev is not None and prev.linetype == LineType.NODE_OPEN:
yield (dl.lineno, 'blank line at start of node body')
if nxt is not None and nxt.linetype == LineType.NODE_CLOSE:
yield (dl.lineno, 'blank line at end of node body')
def _walk_bodies(lines):
"""Yield lists of immediate-child NODE_OPEN lines for each node body
in the input. Skips ref-nodes (&label) since those don't have an
intrinsic ordering."""
body_stack = [[]]
for dl in lines:
if dl.linetype == LineType.NODE_OPEN:
body_stack[-1].append(dl)
body_stack.append([])
continue
if dl.linetype == LineType.NODE_CLOSE:
if len(body_stack) <= 1:
# Unbalanced; ignore to avoid crashing on malformed input
continue
yield body_stack.pop()
continue
while body_stack:
yield body_stack.pop()
def _natural_sort_key(s):
"""Split a string into a tuple of (kind, value) pairs that compares
numeric runs as ints, so 'foo10' sorts after 'foo2'."""
parts = []
for part in re.split(r'(\d+)', s):
if part.isdigit():
parts.append((0, int(part)))
else:
parts.append((1, part))
return tuple(parts)
def check_child_address_order(ctx):
"""Addressed siblings (foo@N) must appear in ascending address
order within their parent node body."""
for children in _walk_bodies(ctx.lines):
addressed = []
for c in children:
if c.node_addr is None:
continue
try:
parts = tuple(int(p, 16) for p in c.node_addr.split(','))
except ValueError:
continue
addressed.append((parts, c))
for i in range(1, len(addressed)):
if addressed[i][0] < addressed[i - 1][0]:
dl = addressed[i][1]
yield (dl.lineno,
'child node @%s out of address order' %
dl.node_addr)
def check_child_name_order(ctx):
"""Unaddressed siblings must appear in natural-sort order by node
name within their parent node body. Addressed children are scoped
by check_child_address_order; reference nodes (&label { ... }) and
the root node are skipped."""
for children in _walk_bodies(ctx.lines):
unaddressed = []
for c in children:
if c.node_addr is not None:
continue
if c.node_name in (None, '/'):
continue
if c.ref_name is not None:
continue
unaddressed.append((_natural_sort_key(c.node_name), c))
for i in range(1, len(unaddressed)):
if unaddressed[i][0] < unaddressed[i - 1][0]:
dl = unaddressed[i][1]
yield (dl.lineno,
'child node %r out of name order' % dl.node_name)
def _property_bucket(name):
"""Return the canonical bucket index for a property:
0 compatible
1 reg / reg-names
2 ranges
3 standard properties (no vendor comma in #-stripped name)
4 vendor-specific properties
5 status
Plus a sub-key inside the bucket for fixed slots (compatible, reg,
reg-names, ranges, status). 'standard' and 'vendor' return None for
the sub-key, signalling that the within-bucket key is computed by
the pairing rules."""
stripped = name.lstrip('#')
if name == 'compatible':
return (0, 0)
if name == 'reg':
return (1, 0)
if name == 'reg-names':
return (1, 1)
if name == 'ranges':
return (2, 0)
if name == 'status':
return (5, 0)
return (4 if ',' in stripped else 3, None)
# Declarative pairing rules: each is a callable
# (name, all_names) -> anchor_name_or_None
# If a rule returns an anchor, the property sorts immediately after the
# anchor. Rules are tried in order; the first match wins. If none
# matches, the within-bucket key falls back to natural sort by the
# #-stripped name.
def _pair_pinctrl_names(name, all_names):
"""pinctrl-names follows the highest pinctrl-N in the same node."""
if name != 'pinctrl-names':
return None
cands = [n for n in all_names if re.match(r'^pinctrl-\d+$', n)]
if not cands:
return None
return max(cands, key=_natural_sort_key)
def _pair_x_names(name, all_names):
"""Generic <x>-names follows its owning property. The owner is
usually plural (clocks/clock-names, dmas/dma-names,
resets/reset-names) but occasionally singular (reg/reg-names is
handled by the fixed slot above; this rule catches anything else)."""
if not name.endswith('-names'):
return None
base = name[:-len('-names')]
# Try plural and singular forms.
if (base + 's') in all_names:
return base + 's'
if base in all_names:
return base
return None
PAIRING_RULES = (_pair_pinctrl_names, _pair_x_names)
def _property_sort_key(name, all_names):
"""Sort key for a property among its node-body siblings.
Format: (bucket, within_key, tiebreak). 'within_key' for
standard/vendor buckets follows pairing rules: a property paired
with anchor X sorts as if it were X with a higher tiebreak."""
bucket, fixed_sub = _property_bucket(name)
if fixed_sub is not None:
return (bucket, (), fixed_sub)
for rule in PAIRING_RULES:
anchor = rule(name, all_names)
if anchor is not None:
return (bucket, _natural_sort_key(anchor.lstrip('#')), 1)
return (bucket, _natural_sort_key(name.lstrip('#')), 0)
def check_property_order(ctx):
"""Properties within a node body must appear in canonical order:
compatible, reg(/reg-names), ranges, then the standard group, then
the vendor-specific group, then status. Inside the standard and
vendor groups, pairing rules apply (e.g. <x>-names follows <x>);
everything else falls back to natural sort by the #-stripped name."""
lines = ctx.lines
for i, dl in enumerate(lines):
if dl.linetype != LineType.NODE_OPEN:
continue
body_depth = dl.depth + 1
props = []
for j in range(i + 1, len(lines)):
d = lines[j]
if d.linetype == LineType.NODE_CLOSE and \
d.depth == body_depth - 1:
break
if d.linetype == LineType.PROPERTY and d.depth == body_depth \
and d.prop_name is not None:
props.append(d)
if len(props) < 2:
continue
all_names = [p.prop_name for p in props]
keyed = [(p, _property_sort_key(p.prop_name, all_names))
for p in props]
for k in range(1, len(keyed)):
if keyed[k][1] < keyed[k - 1][1]:
p = keyed[k][0]
prev = keyed[k - 1][0]
yield (p.lineno,
'property %r out of canonical order '
'(should sort before %r)' %
(p.prop_name, prev.prop_name))
def _strip_strings_and_comments(text):
"""Remove string literals and /* */ + // comments from a single
line, replacing them with empty strings. Used so syntactic checks
(whitespace, hex case, etc.) don't false-positive on contents of
quoted strings or comments. An unclosed /* on the line is treated
as a comment running to end of line."""
text = re.sub(r'"(?:[^"\\]|\\.)*"', '""', text)
text = re.sub(r'/\*.*?\*/', '', text)
text = re.sub(r'/\*.*$', '', text)
text = re.sub(r'//.*$', '', text)
return text
def check_required_blank_lines(ctx):
"""A blank line must precede each child node and the 'status'
property within a node body, except when these are the first
substantive item in the body."""
lines = ctx.lines
for i, open_dl in enumerate(lines):
if open_dl.linetype != LineType.NODE_OPEN:
continue
body_depth = open_dl.depth + 1
prev_substantive = None
between_blanks = 0
depth_inside = 0
for j in range(i + 1, len(lines)):
d = lines[j]
if d.linetype == LineType.NODE_CLOSE and \
d.depth == body_depth - 1 and depth_inside == 0:
break
# Track depth inside nested children so we only look at
# immediate-body items.
if d.linetype == LineType.NODE_OPEN and \
d.depth >= body_depth and depth_inside > 0:
depth_inside += 1
continue
if d.linetype == LineType.NODE_CLOSE and depth_inside > 0:
depth_inside -= 1
continue
if depth_inside > 0:
continue
if d.linetype == LineType.BLANK:
if prev_substantive is not None:
between_blanks += 1
continue
if d.linetype in (LineType.COMMENT, LineType.COMMENT_START,
LineType.COMMENT_BODY, LineType.COMMENT_END,
LineType.PREPROCESSOR):
continue
if d.linetype == LineType.CONTINUATION:
continue
needs_blank = False
if d.linetype == LineType.NODE_OPEN:
needs_blank = True
depth_inside = 1 # entered the child body
elif d.linetype == LineType.PROPERTY and d.prop_name == 'status':
needs_blank = True
if needs_blank and prev_substantive is not None and \
between_blanks == 0:
if d.linetype == LineType.NODE_OPEN:
yield (d.lineno,
'child node must be preceded by a blank line')
else:
yield (d.lineno,
'"status" must be preceded by a blank line')
prev_substantive = d
between_blanks = 0
def check_hex_case(ctx):
"""Hex literals (0xN) must use lowercase digits and prefix."""
for dl in ctx.lines:
if dl.linetype in (LineType.BLANK, LineType.COMMENT,
LineType.COMMENT_START, LineType.COMMENT_BODY,
LineType.COMMENT_END, LineType.PREPROCESSOR):
continue
text = _strip_strings_and_comments(dl.raw)
for m in re.finditer(r'\b0[xX][0-9a-fA-F]+\b', text):
lit = m.group(0)
if any(c.isupper() for c in lit[2:]) or lit[1] == 'X':
yield (dl.lineno,
'hex literal %r must be lowercase' % lit)
def check_unit_address_format(ctx):
"""Unit addresses must be lowercase hex without leading zeros and
without a '0x' prefix. For multi-cell addresses (comma-separated),
each part is checked independently. A single '0' is permitted
(canonical zero)."""
for dl in ctx.lines:
if dl.linetype != LineType.NODE_OPEN:
continue
if dl.node_addr is None:
continue
addr = dl.node_addr
for part in addr.split(','):
if part[:2] in ('0x', '0X'):
yield (dl.lineno,
'unit address %r must not have a "0x" prefix' %
addr)
break
if not re.match(r'^[0-9a-fA-F]+$', part):
yield (dl.lineno,
'unit address %r is not valid hex' % addr)
break
if any(c in 'ABCDEF' for c in part):
yield (dl.lineno,
'unit address %r must be lowercase hex' % addr)
break
if len(part) > 1 and part.startswith('0'):
yield (dl.lineno,
'unit address %r has leading zeros' % addr)
break
def check_value_whitespace(ctx):
"""A <...> cell list must have no whitespace directly after '<'
or directly before '>'. Continuation lines are joined onto the
property so a <...> split across lines is checked too; a '<' or
'>' at a line break is glued straight to the neighbouring value,
so the break itself is not counted as padding. Outside strings
and comments only."""
for dl in ctx.lines:
if dl.linetype != LineType.PROPERTY:
continue
segs = [_strip_strings_and_comments(dl.raw).strip()]
for cont in dl.continuations:
segs.append(_strip_strings_and_comments(cont.stripped).strip())
text = ''
for s in segs:
if not s:
continue
if not text or text.endswith('<') or s.startswith('>'):
text += s
else:
text += ' ' + s
for m in re.finditer(r'<([^<>]*)>', text):
content = m.group(1)
if content and content != content.strip():
yield (dl.lineno, 'extra whitespace inside <...>')
break
def check_node_close_alone(ctx):
"""The closing '};' of a node must be on its own line. The
classifier accepts a canonical "}" or "};" as NODE_CLOSE; a line
that is all closures (e.g. "}; };") is still NODE_CLOSE for depth
tracking but is flagged here via dl.closures. Any other line that
still contains '};' (in code, not in strings or comments) is
mixing a node close with something else."""
for dl in ctx.lines:
if dl.linetype == LineType.NODE_CLOSE:
if dl.closures > 1:
yield (dl.lineno,
'closing brace must be on its own line')
continue
if dl.linetype in (LineType.BLANK, LineType.COMMENT,
LineType.COMMENT_START, LineType.COMMENT_BODY,
LineType.COMMENT_END, LineType.PREPROCESSOR):
continue
text = _strip_strings_and_comments(dl.raw)
if '};' in text:
yield (dl.lineno,
'closing brace must be on its own line')
def _display_col(text):
"""Visual column width of text, with tabs expanded to the next
8-column stop, matching how printf and most editors render a
line and the kernel-wide line length convention."""
col = 0
for ch in text:
if ch == '\t':
col = (col // 8 + 1) * 8
else:
col += 1
return col
def check_line_length(ctx):
"""Lines must not exceed 80 columns; tabs count as 8 (see
_display_col)."""
for dl in ctx.lines:
if dl.linetype == LineType.BLANK:
continue
cols = _display_col(dl.raw)
if cols > 80:
yield (dl.lineno,
'line exceeds 80 columns (%d)' % cols)
def check_continuation_alignment(ctx):
"""A multi-line property's continuation lines must align their
first non-whitespace character to the display column of the first
'<' or '"' after the '=' in the leading line. Display columns are
used so tab-indented .dts files (where a continuation aligns with
tabs plus spaces) are compared correctly."""
for dl in ctx.lines:
if dl.linetype != LineType.PROPERTY:
continue
if not dl.continuations:
continue
eq = dl.raw.find('=')
if eq < 0:
continue
# First '<' or '"' after '='
rest = dl.raw[eq + 1:]
m = re.search(r'[<"]', rest)
if not m:
continue
target_col = _display_col(dl.raw[:eq + 1 + m.start()])
for cont in dl.continuations:
if _display_col(cont.indent_str) != target_col:
yield (cont.lineno,
'continuation should align to column %d '
'(under "<" or \\")' % (target_col + 1))
def check_unclosed_block_comment(ctx):
"""Every /* must have a matching */ in the same block. Catches both
a comment opened on its own line (COMMENT_START) and a tail comment
opened on a PROPERTY or other code line (where in_block_comment is
set by _split_code so the next line becomes COMMENT_BODY without a
preceding COMMENT_START)."""
open_lineno = None
for dl in ctx.lines:
if dl.linetype == LineType.COMMENT_START:
open_lineno = dl.lineno
elif dl.linetype == LineType.COMMENT_END:
open_lineno = None
elif dl.linetype == LineType.COMMENT_BODY and open_lineno is None:
# Block was opened by a /* tail on a code line; report at
# the first orphan body line since the originating line is
# already classified as something else.
open_lineno = dl.lineno
if open_lineno is not None:
yield (open_lineno, 'unclosed /* block comment')
def check_unused_labels(ctx):
"""Labels defined but never referenced are clutter."""
defined, referenced = collect_labels_and_refs(ctx.text)
for label in sorted(defined - referenced):
# Find the line where this label is defined for line-number
# reporting.
m = re.search(r'(?m)^.*\b' + re.escape(label) + r'\s*:', ctx.text)
lineno = ctx.text[:m.start()].count('\n') + 1 if m else 1
yield (lineno, 'label %r defined but never &-referenced' % label)
# --- registry --------------------------------------------------------------
RULES = [
# 'relaxed' is the default; rules in this group must produce zero
# output on a clean kernel tree (post the small prep-cleanup
# commit at the head of this series).
Rule('trailing-whitespace', 'relaxed',
'no trailing whitespace on any line',
check_trailing_whitespace),
Rule('tab-in-dts', 'relaxed',
'YAML examples may not contain tab characters',
check_tab_in_dts, applies_to=('yaml',)),
Rule('mixed-indent-chars', 'relaxed',
'indent must not mix tabs and spaces',
check_mixed_indent_chars),
Rule('unclosed-block-comment', 'relaxed',
'every /* block comment must close with */',
check_unclosed_block_comment),
# DTS files always use tabs; this is not negotiable per kernel
# coding style (.dts files are real source). Relaxed mode.
Rule('indent-unit-dts', 'relaxed',
'DTS files: 1 tab per nesting level',
check_indent_unit_dts,
applies_to=('dts', 'dtsi', 'dtso')),
# 'strict' rules are opt-in (e.g. for new submissions via
# checkpatch.pl in a follow-up series). They flag many existing
# files and can be promoted to relaxed once those are cleaned up.
Rule('indent-unit', 'strict',
'YAML: 2 or 4 spaces per level',
check_indent_unit_relaxed, applies_to=('yaml',)),
Rule('indent-unit-strict', 'strict',
'YAML: must be 4 spaces per level',
check_indent_unit_strict, applies_to=('yaml',)),
Rule('indent-consistent', 'strict',
'every line indented at depth * unit',
check_indent_consistent),
Rule('blank-lines', 'strict',
'no consecutive blanks; no blanks at node body edges',
check_blank_lines),
Rule('child-address-order', 'strict',
'addressed siblings must be in ascending address order',
check_child_address_order),
Rule('child-name-order', 'strict',
'unaddressed siblings must be in natural-sort name order',
check_child_name_order),
Rule('property-order', 'strict',
'canonical bucket + pairing + natural-sort order of properties',
check_property_order),
Rule('required-blank-lines', 'strict',
'blank line before child nodes and before "status"',
check_required_blank_lines),
Rule('hex-case', 'strict',
'hex literals must be lowercase',
check_hex_case),
Rule('unit-address-format', 'strict',
'unit addresses must be lowercase hex without leading zeros',
check_unit_address_format),
Rule('value-whitespace', 'strict',
'no whitespace directly inside <...> brackets',
check_value_whitespace),
Rule('node-close-alone', 'strict',
'closing brace must be on its own line',
check_node_close_alone),
Rule('line-length', 'strict',
'lines must not exceed 80 columns',
check_line_length),
Rule('continuation-alignment', 'strict',
'multi-line property continuations align under "<" or "\\""',
check_continuation_alignment),
Rule('unused-labels', 'strict',
'every label must be &-referenced in the same example/file '
'(skipped for .dtsi/.dtso since labels there are exported)',
check_unused_labels, applies_to=('yaml', 'dts')),
]
def select_rules(mode, input_kind):
"""Return rules that apply to the given mode and input type."""
rank = {'relaxed': 0, 'strict': 1}
out = []
for r in RULES:
if rank[r.mode] > rank[mode]:
continue
if input_kind not in r.applies_to:
continue
out.append(r)
return out
# ---------------------------------------------------------------------------
# Block runner
# ---------------------------------------------------------------------------
def check_block(text, mode, indent_kind, input_type):
"""Run all selected rules on a single block of DTS text. Returns a
list of (lineno, rule_name, message) tuples."""
lines = classify_lines(text)
ctx = Ctx(lines, text, mode, indent_kind)
rules = select_rules(mode, input_type)
findings = []
for r in rules:
for lineno, msg in r.check(ctx):
findings.append((lineno, r.name, msg))
findings.sort(key=lambda t: (t[0], t[1]))
return findings
# ---------------------------------------------------------------------------
# Input drivers (YAML examples vs raw DTS)
# ---------------------------------------------------------------------------
def _yaml_loader():
return ruamel.yaml.YAML()
def iter_yaml_examples(filepath):
"""Yield (example_text, base_lineno_in_file, example_index) tuples."""
yaml = _yaml_loader()
try:
with open(filepath, encoding='utf-8') as f:
data = yaml.load(f)
except Exception as e:
print('%s: error loading YAML: %s' % (filepath, e),
file=sys.stderr)
return
if not isinstance(data, dict) or 'examples' not in data:
return
examples = data['examples']
if not hasattr(examples, '__iter__'):
return
for i, ex in enumerate(examples):
if not isinstance(ex, str):
continue
try:
base = examples.lc.item(i)[0] + 2
except Exception:
base = 1
yield (str(ex), base, i)
def iter_dts_file(filepath):
"""Treat the whole file as a single block."""
try:
with open(filepath, encoding='utf-8') as f:
text = f.read()
except Exception as e:
print('%s: error reading: %s' % (filepath, e), file=sys.stderr)
return
yield (text, 1, None)
# ---------------------------------------------------------------------------
# Top-level processing
# ---------------------------------------------------------------------------
def input_kind(filepath):
p = filepath.lower()
if p.endswith('.yaml') or p.endswith('.yml'):
return 'yaml'
if p.endswith('.dts'):
return 'dts'
if p.endswith('.dtsi'):
return 'dtsi'
if p.endswith('.dtso'):
return 'dtso'
return None
# All input types that use tab indentation and follow DTS coding style.
DTS_FAMILY = ('dts', 'dtsi', 'dtso')
def collect_findings(filepath, mode):
"""Return a (lines, count) pair for filepath. lines is a list of
formatted output strings; count is the number of findings."""
kind = input_kind(filepath)
if kind == 'yaml':
indent_kind = 'spaces'
iterator = iter_yaml_examples(filepath)
elif kind in DTS_FAMILY:
indent_kind = 'tab'
iterator = iter_dts_file(filepath)
else:
return (['%s: unknown file type, skipping' % filepath], 0)
out = []
for text, base, idx in iterator:
for lineno, rule, msg in check_block(text, mode, indent_kind, kind):
abs_line = base + lineno - 1
ex_tag = '' if idx is None else ' example %d' % idx
out.append('%s:%d:%s [%s] %s' %
(filepath, abs_line, ex_tag, rule, msg))
return (out, len(out))
# Worker entry point for ProcessPoolExecutor.map(). Top-level so it is
# picklable on every platform.
def _worker(args):
filepath, mode = args
return collect_findings(filepath, mode)
def main():
import os
ap = argparse.ArgumentParser(
description='Check DTS coding style on YAML examples and '
'.dts/.dtsi/.dtso files.',
fromfile_prefix_chars='@')
ap.add_argument('--mode', choices=('relaxed', 'strict'),
default='relaxed',
help='which rule set to apply (default: relaxed)')
ap.add_argument('-j', '--jobs', type=int, default=0,
metavar='N',
help='run N workers in parallel (default: respect '
'the make jobserver via $PARALLELISM, otherwise '
'os.cpu_count(); use 1 to disable multiprocessing)')
ap.add_argument('--list-rules', action='store_true',
help='print all rules with their mode and exit')
ap.add_argument('files', nargs='*', metavar='file',
help='YAML binding files or .dts/.dtsi/.dtso files; '
'use @argfile to read paths from a file')
args = ap.parse_args()
if args.list_rules:
for r in RULES:
applies = ','.join(r.applies_to)
print('%-22s %-7s [%s] %s' %
(r.name, r.mode, applies, r.description))
return 0
if not args.files:
ap.error('no input files')
if args.jobs > 0:
jobs = args.jobs
else:
# When invoked under scripts/jobserver-exec, $PARALLELISM
# holds the slot count make has reserved for us; this lets
# `make -j N dt_binding_check` constrain our worker pool to N.
try:
jobs = int(os.environ['PARALLELISM'])
except (KeyError, ValueError):
jobs = os.cpu_count() or 1
# Single-process path: keep import surface small for tests and
# easy debugging.
if jobs == 1 or len(args.files) == 1:
total = 0
for f in args.files:
lines, n = collect_findings(f, args.mode)
for line in lines:
print(line, file=sys.stderr)
total += n
return 1 if total else 0
# Multi-process path. ex.map preserves input order so output is
# deterministic across runs.
from concurrent.futures import ProcessPoolExecutor
total = 0
work = [(f, args.mode) for f in args.files]
chunk = max(1, len(work) // (jobs * 8)) if work else 1
with ProcessPoolExecutor(max_workers=jobs) as ex:
for lines, n in ex.map(_worker, work, chunksize=chunk):
for line in lines:
print(line, file=sys.stderr)
total += n
return 1 if total else 0
if __name__ == '__main__':
sys.exit(main())