blob: c3e83574de30b3c50cd0bedab4b072f712db365c [file] [log] [blame]
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
#
# Copyright (c) 2025 - Sasha Levin <sashal@kernel.org>
#
#set -x
# Configuration
: "${CVEKERNELTREE:=$HOME/linux}"
export CVEKERNELTREE
# Function to count unique CVEs in a date range
count_cves_in_range() {
local start_date="$1"
local end_date="$2"
# Get unique CVE IDs from filenames, ignoring extensions and duplicates,
# count the .json file as that is created the first time around (a huge
# hunk of .dyad files were added all at once which could skew the results.
git log --diff-filter=A --pretty=format: --name-only --after="$start_date" --before="$end_date" cve/published/ |
grep json$ |
grep -o 'CVE-[0-9]\{4\}-[0-9]\{4,\}' |
sort -u |
wc -l
}
# Function to get commit author from sha1 file
get_commit_author() {
local sha1_file="$1"
local sha1=$(cat "$sha1_file")
if [ -n "$sha1" ]; then
# Get author name from Linux repo
git --git-dir="$CVEKERNELTREE/.git" log -1 --format="%aN" "$sha1" 2>/dev/null || echo "unknown"
fi
}
export -f get_commit_author
# Function to get subsystem from commit
get_commit_subsystem() {
local sha1_file="$1"
local sha1=$(cat "$sha1_file")
if [ -n "$sha1" ]; then
# Get the first changed file from the commit to determine subsystem and sub-subsystem
local path=$(git --git-dir="$CVEKERNELTREE/.git" show --pretty=format: --name-only "$sha1" 2>/dev/null | head -n1)
if [ -n "$path" ]; then
local main_subsystem=$(echo "$path" | cut -d'/' -f1)
local sub_subsystem=$(echo "$path" | cut -d'/' -f1,2)
echo "$main_subsystem|$sub_subsystem"
fi
fi
}
export -f get_commit_subsystem
# Function to process a single CVE for subsystem stats
process_cve_subsystem() {
local sha1_file="$1"
local tmp_dir="$2"
local sub_tmp_dir="$3"
local cve=$(basename "$sha1_file" .sha1)
local subsystem_info=$(get_commit_subsystem "$sha1_file")
if [ -n "$subsystem_info" ]; then
local main_subsystem=$(echo "$subsystem_info" | cut -d'|' -f1)
local sub_subsystem=$(echo "$subsystem_info" | cut -d'|' -f2)
# Use underscore instead of slash for filenames
local main_file="$tmp_dir/${main_subsystem}"
local sub_file="$sub_tmp_dir/${sub_subsystem//\//_}"
# Use flock to safely append to files from multiple processes
(
flock -x 200
echo "$cve" >> "$main_file"
) 200>"$main_file.lock"
if [ "$main_subsystem" != "$sub_subsystem" ]; then
(
flock -x 200
echo "$cve" >> "$sub_file"
) 200>"$sub_file.lock"
fi
fi
}
export -f process_cve_subsystem
# Function to show subsystem statistics
show_subsystem_stats() {
local num_subsystems="$1"
local num_sub_subsystems="$2"
local show_authors="$3"
print_header "Top $num_subsystems Subsystems with CVEs (showing top $num_sub_subsystems sub-subsystems each)"
# Create temporary directories for subsystem processing
local tmp_dir=$(mktemp -d)
local sub_tmp_dir=$(mktemp -d)
mkdir -p "$tmp_dir/locks"
mkdir -p "$sub_tmp_dir/locks"
trap 'rm -rf "$tmp_dir" "$sub_tmp_dir"' EXIT
# Process all sha1 files in parallel - now recursively searching through subdirectories
find cve/published -type f -name "*.sha1" -exec readlink -f {} \; | \
parallel -j$(nproc) process_cve_subsystem {} "$tmp_dir" "$sub_tmp_dir"
# Clean up lock files
rm -f "$tmp_dir"/*.lock "$sub_tmp_dir"/*.lock
# Count and sort subsystems by CVE count
for subsys in "$tmp_dir"/*; do
if [ -f "$subsys" ]; then
local name=$(basename "$subsys")
local count=$(wc -l < "$subsys")
echo "$count $name"
fi
done | sort -rn | head -n "$num_subsystems" | while read count subsystem; do
echo "$subsystem: $count CVEs"
# Find files that start with the subsystem name followed by underscore
find "$sub_tmp_dir" -type f -name "${subsystem}_*" 2>/dev/null | while read sub_file; do
local sub_name=$(basename "$sub_file" | tr '_' '/')
local sub_count=$(wc -l < "$sub_file")
echo "$sub_count $sub_name"
done | sort -rn | head -n "$num_sub_subsystems" | while read sub_count sub_name; do
echo " $sub_name: $sub_count CVEs"
done
# If authors flag is set, show top authors for this subsystem
if [ "$show_authors" = true ]; then
echo " Top authors for $subsystem:"
# Create a temporary file to store CVE IDs for this subsystem
local cve_list_file=$(mktemp)
local authors_file=$(mktemp)
cat "$tmp_dir/$subsystem" > "$cve_list_file"
# Process CVEs in parallel to get authors
cat "$cve_list_file" | parallel -j$(nproc) --keep-order \
'sha1_file=$(find cve/published -type f -name "{}.sha1" 2>/dev/null); \
if [ -n "$sha1_file" ]; then \
author=$(get_commit_author "$sha1_file"); \
if [ -n "$author" ] && [ "$author" != "unknown" ]; then \
echo "$author"; \
fi; \
fi' > "$authors_file"
# Sort and count authors
if [ -s "$authors_file" ]; then
sort "$authors_file" | uniq -c | sort -rn | head -n 5 | \
while read author_count author; do
echo " $author: $author_count CVEs"
done
else
echo " No author information available"
fi
# Clean up temporary files
rm -f "$cve_list_file" "$authors_file"
echo ""
fi
echo ""
done
}
# Function to print a section header
print_header() {
echo -e "\n=== $1 ==="
}
# Function to get kernel versions from CVE
get_kernel_versions() {
local cve_file="$1"
local type="$2" # "fixed" or "vulnerable"
local cve_base=$(basename "$cve_file" .sha1)
local dyad_file="$(dirname "$cve_file")/${cve_base}.dyad"
if [ -f "$dyad_file" ]; then
# Get the version from the dyad file, skipping comments and empty lines
# Only take lines that have a real version (not 0:0)
# Only take mainline versions (no -rc versions)
# Only take versions with exactly two numbers (e.g., 6.1, not 6.1.120)
# Extract only major.minor version (e.g., 6.1 from 6.1.120)
# For fixed versions, use field 3; for vulnerable versions, use field 1
local field=3
if [ "$type" = "vulnerable" ]; then
field=1
fi
grep -v '^#' "$dyad_file" | \
grep -v '^$' | \
grep -v ':0:0$' | \
cut -d: -f$field | \
grep -v -- '-rc' | \
grep -E '^[0-9]+\.[0-9]+$' | \
head -n1
fi
}
export -f get_kernel_versions
# Function to show version statistics
show_version_stats() {
local num_versions="$1"
print_header "Top $num_versions Major Kernel Versions with CVE Fixes"
# Create temporary directories for version processing
local fixed_dir=$(mktemp -d)
local vuln_dir=$(mktemp -d)
trap 'rm -rf "$fixed_dir" "$vuln_dir"' EXIT
# Process all sha1 files in parallel to get versions from dyad files
find cve/published -type f -name "*.sha1" | \
parallel -j$(nproc) get_kernel_versions {} fixed | \
sort | \
grep -v "^$" | \
while read version; do
echo "$version" >> "$fixed_dir/${version}"
done
find cve/published -type f -name "*.sha1" | \
parallel -j$(nproc) get_kernel_versions {} vulnerable | \
sort | \
grep -v "^$" | \
while read version; do
echo "$version" >> "$vuln_dir/${version}"
done
# Show fixed versions
echo -e "\nTop $num_versions Major Kernel Versions Where CVEs Were Fixed:"
for version_file in "$fixed_dir"/*; do
if [ -f "$version_file" ]; then
local version=$(basename "$version_file")
local count=$(wc -l < "$version_file")
if [ "$version" = "0" ]; then
echo "$count Unknown"
else
echo "$count $version"
fi
fi
done | sort -rn | head -n "$num_versions" | while read count version; do
if [ "$version" = "Unknown" ]; then
echo "$version: $count CVEs fixed"
else
printf "Linux $version: %4d CVEs fixed\n" "$count"
fi
done
# Show vulnerable versions
echo -e "\nTop $num_versions Major Kernel Versions Where CVEs Were Introduced:"
for version_file in "$vuln_dir"/*; do
if [ -f "$version_file" ]; then
local version=$(basename "$version_file")
local count=$(wc -l < "$version_file")
if [ "$version" = "0" ]; then
echo "$count Unknown"
else
echo "$count $version"
fi
fi
done | sort -rn | head -n "$num_versions" | while read count version; do
if [ "$version" = "Unknown" ]; then
echo "$version: $count CVEs introduced"
else
printf "Linux $version: %4d CVEs introduced\n" "$count"
fi
done
}
# Function to process a single CVE for TTF analysis
process_cve_ttf() {
local sha1_file="$1"
local tmp_dir="$2"
local cve_base=$(basename "$sha1_file" .sha1)
local vuln_ver=$(get_kernel_versions "$sha1_file" "vulnerable")
local fixed_ver=$(get_kernel_versions "$sha1_file" "fixed")
# Only process if we have both versions and they are two-number versions
if [ -n "$vuln_ver" ] && [ -n "$fixed_ver" ] && [ "$vuln_ver" != "0" ] && [ "$fixed_ver" != "0" ] && \
[[ "$vuln_ver" =~ ^[0-9]+\.[0-9]+$ ]] && [[ "$fixed_ver" =~ ^[0-9]+\.[0-9]+$ ]]; then
# Convert versions to release dates using Linux repo
local vuln_date=$(git --git-dir="$CVEKERNELTREE/.git" tag -l "v${vuln_ver}*" --sort=v:refname | head -n1 | xargs git --git-dir="$CVEKERNELTREE/.git" log -1 --format=%ai 2>/dev/null | cut -d' ' -f1)
local fixed_date=$(git --git-dir="$CVEKERNELTREE/.git" tag -l "v${fixed_ver}*" --sort=v:refname | head -n1 | xargs git --git-dir="$CVEKERNELTREE/.git" log -1 --format=%ai 2>/dev/null | cut -d' ' -f1)
if [ -n "$vuln_date" ] && [ -n "$fixed_date" ]; then
# Calculate days between dates
local days=$(( ($(date -d "$fixed_date" +%s) - $(date -d "$vuln_date" +%s)) / 86400 ))
# Use flock to safely append to files from multiple processes
(
flock -x 200
echo "$days" >> "$tmp_dir/ttf_days"
) 200>"$tmp_dir/ttf_days.lock"
# Categorize into time ranges
local category
if [ $days -le 30 ]; then
category="1month"
elif [ $days -le 90 ]; then
category="3months"
elif [ $days -le 180 ]; then
category="6months"
elif [ $days -le 365 ]; then
category="1year"
else
category="over1year"
fi
(
flock -x 200
echo "$cve_base" >> "$tmp_dir/$category"
) 200>"$tmp_dir/$category.lock"
fi
fi
}
export -f process_cve_ttf
# Function to analyze time to fix
show_time_to_fix_stats() {
print_header "Time to Fix Analysis"
# Create temporary directory for TTF processing
local tmp_dir=$(mktemp -d)
mkdir -p "$tmp_dir/locks"
trap 'rm -rf "$tmp_dir"' EXIT
# Process all CVEs in parallel
find cve/published -type f -name "*.sha1" | \
parallel -j$(nproc) process_cve_ttf {} "$tmp_dir"
# Clean up lock files
rm -f "$tmp_dir"/*.lock
# Calculate statistics
if [ -f "$tmp_dir/ttf_days" ]; then
local total_cves=$(wc -l < "$tmp_dir/ttf_days")
local avg_days=$(awk '{ sum += $1 } END { print sum/NR }' "$tmp_dir/ttf_days")
local median_days=$(sort -n "$tmp_dir/ttf_days" | awk -v total="$total_cves" 'BEGIN{count=0} {count++; nums[count]=$1} END{if(count%2==0) print (nums[int(count/2)]+nums[int(count/2)+1])/2; else print nums[int(count/2)+1]}')
echo "Analysis based on $total_cves CVEs with known introduction and fix dates"
echo "Average time to fix: $(printf "%.1f" $avg_days) days"
echo "Median time to fix: $(printf "%.1f" $median_days) days"
echo ""
echo "Distribution:"
for period in "1month" "3months" "6months" "1year" "over1year"; do
local count=0
if [ -f "$tmp_dir/$period" ]; then
count=$(wc -l < "$tmp_dir/$period")
fi
local percentage=$(awk -v count="$count" -v total="$total_cves" 'BEGIN { printf "%.1f", (count/total)*100 }')
case $period in
"1month") echo "≤ 30 days: $count CVEs ($percentage%)" ;;
"3months") echo "31-90 days: $count CVEs ($percentage%)" ;;
"6months") echo "91-180 days: $count CVEs ($percentage%)" ;;
"1year") echo "181-365 days: $count CVEs ($percentage%)" ;;
"over1year") echo "> 365 days: $count CVEs ($percentage%)" ;;
esac
done
else
echo "No CVEs found with both introduction and fix dates."
fi
}
# Parse command line arguments
show_authors=false
show_subsystems=false
show_versions=false
show_ttf=false
num_authors=10
num_subsystems=10
num_sub_subsystems=3
num_versions=10
for arg in "$@"; do
if [[ $arg =~ ^--authors(=([0-9]+))?$ ]]; then
show_authors=true
if [[ -n "${BASH_REMATCH[2]}" ]]; then
num_authors="${BASH_REMATCH[2]}"
fi
elif [[ $arg =~ ^--subsystem(=([0-9]+)(,[0-9]+)?)?$ ]]; then
show_subsystems=true
if [[ -n "${BASH_REMATCH[2]}" ]]; then
if [[ "${BASH_REMATCH[2]}" =~ ^([0-9]+),([0-9]+)$ ]]; then
num_subsystems="${BASH_REMATCH[1]}"
num_sub_subsystems="${BASH_REMATCH[2]}"
else
num_subsystems="${BASH_REMATCH[2]}"
fi
fi
elif [[ $arg =~ ^--version(=([0-9]+))?$ ]]; then
show_versions=true
if [[ -n "${BASH_REMATCH[2]}" ]]; then
num_versions="${BASH_REMATCH[2]}"
fi
elif [[ $arg == "--ttf" ]]; then
show_ttf=true
fi
done
# Get first CVE date for statistics
first_cve_date=$(git log --diff-filter=A --reverse --pretty=format:%ad --date=short cve/published/ | head -n 1)
if [ -z "$first_cve_date" ]; then
first_cve_date="2019-01-01" # Fallback if no git history
fi
# Show summary statistics if --summary flag is provided
if [[ " $* " =~ " --summary " ]]; then
# Get current date components
current_year=$(date +%Y)
current_month=$(date +%m)
# Calculate statistics per year
print_header "CVEs Published Per Year"
end_year=$(date +%Y)
for year in $(seq 2019 $end_year); do
count=$(count_cves_in_range "$year-01-01" "$((year+1))-01-01")
printf "$year: %4d CVEs\n" "$count"
done
# Calculate statistics for last 6 months
print_header "CVEs Published in Last 6 Months"
for i in {5..0}; do
# Calculate month and year
month=$((current_month - i))
year=$current_year
if [ $month -le 0 ]; then
month=$((month + 12))
year=$((year - 1))
fi
# Format dates for consistent 2-digit months
month_padded=$(printf "%02d" $month)
next_month=$((month + 1))
next_year=$year
if [ $next_month -gt 12 ]; then
next_month=1
next_year=$((year + 1))
fi
next_month_padded=$(printf "%02d" $next_month)
# Get count for this month
start_date="$year-$month_padded-01"
end_date="$next_year-$next_month_padded-01"
count=$(count_cves_in_range "$start_date" "$end_date")
printf "%15s: %4d CVEs\n" "$(date -d "$start_date" +"%B %Y")" "$count"
done
# Calculate overall averages
print_header "Overall Averages"
total_days=$(( ($(date +%s) - $(date -d "$first_cve_date" +%s)) / 86400 ))
total_cves=$(count_cves_in_range "$first_cve_date" "$(date +%Y-%m-%d)")
# Calculate averages
avg_per_month=$(bc <<< "scale=2; $total_cves / ($total_days / 30.44)")
avg_per_week=$(bc <<< "scale=2; $total_cves / ($total_days / 7)")
avg_per_day=$(bc <<< "scale=2; $total_cves / $total_days")
echo "Average CVEs per month: $avg_per_month"
echo "Average CVEs per week: $avg_per_week"
echo "Average CVEs per day: $avg_per_day"
fi
# Show author stats if --authors flag is provided
if [[ "$show_authors" = true ]]; then
print_header "Top $num_authors CVE Commit Authors"
# Find all sha1 files and process them in parallel
find cve/published -type f -name "*.sha1" | \
parallel -j$(nproc) get_commit_author | \
sort | \
grep -v "^$" | \
grep -v "unknown" | \
uniq -c | \
sort -rn | \
head -n "$num_authors" | \
while read count author; do
echo "$author: $count CVEs"
done
fi
# Show subsystem stats if --subsystem flag is provided
if [[ "$show_subsystems" = true ]]; then
show_subsystem_stats "$num_subsystems" "$num_sub_subsystems" "$show_authors"
fi
# Show version stats if --version flag is provided
if [[ "$show_versions" = true ]]; then
show_version_stats "$num_versions"
fi
# Show time to fix stats if --ttf flag is provided
if [[ "$show_ttf" = true ]]; then
show_time_to_fix_stats
fi
# Update help message
if [[ ! " $* " =~ " --summary " ]] && [[ "$show_authors" = false ]] && [[ "$show_subsystems" = false ]] && [[ "$show_versions" = false ]] && [[ "$show_ttf" = false ]]; then
echo "Usage: $0 [--summary] [--authors[=N]] [--subsystem[=M[,S]]] [--version[=N]] [--ttf]"
echo " --summary Show general CVE statistics"
echo " --authors[=N] Show top N CVE commit authors (default: 10)"
echo " --subsystem[=M[,S]] Show top M subsystems with S sub-subsystems each (default: M=10,S=3)"
echo " --version[=N] Show top N kernel versions with CVE fixes (default: 10)"
echo " --ttf Show Time to Fix analysis"
fi
echo -e "\nStatistics calculated from $first_cve_date to $(date +%Y-%m-%d)"