blob: 0a48ea0c9b253ca5ee343e163101ce09ad052e35 [file] [log] [blame]
#!/usr/bin/env python
# Provided an index URL and a few project hints page this
# will spit out a shiny HTML 5 W3C compliant releases page.
# Copyright (C) 2012-2014 Luis R. Rodriguez <mcgrof@do-not-panic.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from HTMLParser import HTMLParser
import urllib
import urllib2
import ConfigParser
import re
import sys
import os
import getopt
from operator import itemgetter
from datetime import date
debug = 0
def rel_html_license_verbose():
print '-----------------------------------------------------------------------'
print 'Copyright (C) 2012-2014 Luis R. Rodriguez <mcgrof@do-not-panic.com>'
print ''
print 'This program is free software: you can redistribute it and/or modify'
print 'it under the terms of the GNU Affero General Public License as'
print 'published by the Free Software Foundation, either version 3 of the'
print 'License, or (at your option) any later version.'
print ''
print 'This program is distributed in the hope that it will be useful,'
print 'but WITHOUT ANY WARRANTY; without even the implied warranty of'
print 'MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the'
print 'GNU Affero General Public License for more details.'
print ''
print 'You should have received a copy of the GNU Affero General Public License'
print 'along with this program. If not, see <http://www.gnu.org/licenses/>.'
print '-----------------------------------------------------------------------'
def rel_html_license():
return "AGPL"
def rel_html_href():
return '<a href="https://github.com/mcgrof/rel-html">rel-html</a>'
def __compute_rel_weight(rel_specs):
weight = 0
extra = 0
sublevel = 0
relmod = 0
relmod_type = 0
if (debug):
sys.stdout.write("VERSION = %s\n" % rel_specs['VERSION'])
sys.stdout.write("PATCHLEVEL = %s\n" % rel_specs['PATCHLEVEL'])
sys.stdout.write("SUBLEVEL = %s\n" % rel_specs['SUBLEVEL'])
sys.stdout.write("EXTRAVERSION = %s\n" % rel_specs['EXTRAVERSION'])
sys.stdout.write("RELMOD_UPDATE = %s\n" % rel_specs['RELMOD_UPDATE'])
sys.stdout.write("RELMOD_TYPE = %s\n" % rel_specs['RELMOD_TYPE'])
if (rel_specs['EXTRAVERSION'] != ''):
if ("." in rel_specs['EXTRAVERSION'] or
"rc" in rel_specs['EXTRAVERSION']):
rc = rel_specs['EXTRAVERSION'].lstrip("-rc")
if (rc == ""):
rc = 0
else:
rc = int(rc) - 20
extra = int(rc)
else:
extra = int(rel_specs['EXTRAVERSION']) + 10
if (rel_specs['SUBLEVEL'] != ''):
sublevel = int(rel_specs['SUBLEVEL'].lstrip(".")) * 20
else:
sublevel = 5
if (rel_specs['RELMOD_UPDATE'] != ''):
mod = rel_specs['RELMOD_UPDATE']
if (mod == ""):
mod = 0
else:
mod = int(mod)
relmod = int(mod)
if (rel_specs['RELMOD_TYPE'] != ''):
rtype = rel_specs['RELMOD_TYPE']
if ("c" in rtype):
relmod_type = relmod_type + 6
if ("u" in rtype):
relmod_type = relmod_type + 7
if ("p" in rtype):
relmod_type = relmod_type + 8
if ("n" in rtype):
relmod_type = relmod_type + 9
if ("s" in rtype):
relmod_type = relmod_type + 10
weight = (int(rel_specs['VERSION']) << 32) + \
(int(rel_specs['PATCHLEVEL']) << 16) + \
(sublevel << 8 ) + \
(extra * 60) + (relmod * 2) + relmod_type
return weight
def get_rel_spec(rel):
if ("rc" in rel):
m = re.match(r"v*(?P<VERSION>\d+)\.+"
"(?P<PATCHLEVEL>\d+)[.]*"
"(?P<SUBLEVEL>\d*)"
"(?P<EXTRAVERSION>[-rc]+\w*)\-*"
"(?P<RELMOD_UPDATE>\d*)[-]*"
"(?P<RELMOD_TYPE>[usnpc]*)",
rel)
else:
m = re.match(r"v*(?P<VERSION>\d+)\.+"
"(?P<PATCHLEVEL>\d+)[.]*"
"(?P<SUBLEVEL>\d*)[.]*"
"(?P<EXTRAVERSION>\w*)\-*"
"(?P<RELMOD_UPDATE>\d*)[-]*"
"(?P<RELMOD_TYPE>[usnpc]*)",
rel)
if (not m):
return m
rel_specs = m.groupdict()
return rel_specs
def compute_rel_weight(rel):
rel_specs = get_rel_spec(rel)
if (not rel_specs):
return 0
return __compute_rel_weight(rel_specs)
def __compute_rel_weight_next(rel_specs):
weight = 0
date = 0
relmod_update = 0
relmod_type = 0
if (debug):
sys.stdout.write("DATE_VERSION = %s\n" % rel_specs['DATE_VERSION'])
sys.stdout.write("RELMOD_UPDATE = %s\n" % rel_specs['RELMOD_UPDATE'])
sys.stdout.write("RELMOD_TYPE = %s\n" % rel_specs['RELMOD_TYPE'])
if (rel_specs['DATE_VERSION'] != ''):
date = rel_specs['DATE_VERSION'].lstrip("rc")
date = int(date.translate(None, "-"))
if (rel_specs['RELMOD_UPDATE'] != ''):
mod = rel_specs['RELMOD_UPDATE']
if (mod == ""):
mod = 0
else:
mod = int(mod)
relmod = int(mod)
if (rel_specs['RELMOD_TYPE'] != ''):
rtype = rel_specs['RELMOD_TYPE']
if ("c" in rtype):
relmod_type = relmod_type + 6
if ("u" in rtype):
relmod_type = relmod_type + 7
if ("p" in rtype):
relmod_type = relmod_type + 8
if ("n" in rtype):
relmod_type = relmod_type + 9
if ("s" in rtype):
relmod_type = relmod_type + 10
weight = (999 << 32) + \
(int(date) << 16) + \
(int(relmod_update) << 8 ) + \
(int(relmod_type))
return weight
def compute_rel_weight_next(rel):
if (len(rel) < 4):
return 0
if (rel[4] == "-"):
m = re.match(r"v*(?P<DATE_VERSION>\w+-*\w*-*\w*)[-]*"
"(?P<RELMOD_UPDATE>\d*)[-]*"
"(?P<RELMOD_TYPE>[usnpc]*)",
rel)
else:
m = re.match(r"v*(?P<DATE_VERSION>\w+)[-]*"
"(?P<RELMOD_UPDATE>\d*)[-]*"
"(?P<RELMOD_TYPE>[usnpc]*)",
rel)
if (not m):
return 0
rel_specs = m.groupdict()
return __compute_rel_weight_next(rel_specs)
def sort_rels_weight(col):
return sorted(col, key=lambda k: k['weight'], reverse=True)
class index_tarball_hunter(HTMLParser):
"Goes through an index page with releases and adds tarball targets to the index parser"
def parse(self, html):
"Parse the given string 's'."
self.feed(html)
self.close()
def handle_decl(self, decl):
pass
def tarball_add_stable(self, t_new):
s_new = t_new.get('specifics')
for t_old in self.tarballs:
s_old = t_old.get('specifics')
idx = self.tarballs.index(t_old)
if (s_old['VERSION'] != s_new['VERSION']):
break
if (s_old['PATCHLEVEL'] != s_new['PATCHLEVEL']):
break
if (s_old['EXTRAVERSION'] != '' and
s_new['EXTRAVERSION'] != ''):
if (s_new['EXTRAVERSION'] > s_old['EXTRAVERSION']):
self.tarballs.remove(t_old)
self.tarballs.insert(idx, t_new)
return
if (s_new['EXTRAVERSION'] < s_old['EXTRAVERSION']):
return
if (s_old['RELMOD_UPDATE'] != '' and
s_new['RELMOD_UPDATE'] != ''):
if (s_old['RELMOD_UPDATE'] == s_new['RELMOD_UPDATE']):
if (s_new['RELMOD_TYPE'] == ''):
self.tarballs.insert(idx - 1, t_new)
return
self.tarballs.append(t_new)
return
if (s_new['RELMOD_UPDATE'] > s_old['RELMOD_UPDATE']):
self.tarballs.remove(t_old)
self.tarballs.insert(idx, t_new)
return
else:
return
self.tarballs.append(t_new)
def tarball_add_next(self, t_new):
index_parser = self.index_parser
s_new = t_new.get('specifics')
for t_old in self.tarballs:
s_old = t_old.get('specifics')
idx = self.tarballs.index(t_old)
for next_date in index_parser.next_rel_dates:
if (next_date in t_old.get('rel')):
self.tarballs.insert(idx - 1, t_new)
return
self.tarballs.append(t_new)
def is_rel_eol(self, rel_specs):
index_parser = self.index_parser
for eol in index_parser.eol:
m = re.match(r"v*(?P<VERSION>\d+)\.+"
"(?P<PATCHLEVEL>\d+)[.]*"
"(?P<SUBLEVEL>\w*)[.-]*"
"(?P<EXTRAVERSION>\w*)",
eol)
if (not m):
continue
eol_specs = m.groupdict()
if (eol_specs['VERSION'] == rel_specs['VERSION'] and
eol_specs['PATCHLEVEL'] == rel_specs['PATCHLEVEL']):
return True
return False
def get_rel_match(self, value):
index_parser = self.index_parser
rel_match = dict(m=None, rel_name="")
for rel_name in index_parser.rel_names:
m = re.match(r'' + rel_name + '-+'
"v*(?P<VERSION>\d+)\.+"
"(?P<PATCHLEVEL>\d+)\.*"
"(?P<SUBLEVEL>\w*)[.-]*"
"(?P<EXTRAVERSION>\w*)[-]*"
"(?P<RELMOD_UPDATE>\d*)[-]*"
"(?P<RELMOD_TYPE>[usnpc]*)",
value)
if (m):
rel_match['m'] = m
rel_match['rel_name'] = rel_name
return rel_match
return rel_match
def get_rel_match_next(self, value):
index_parser = self.index_parser
rel_match = dict(m=None, rel_name="")
for rel_name in index_parser.rel_names:
for next_date in index_parser.next_rel_dates:
m = re.match(r'' + rel_name + '+'
+ '\-(?P<DATE_VERSION>' + next_date + '+)'
+ '\-*(?P<EXTRAVERSION>\d*)'
+ '\-*(?P<RELMOD>\w*)',
value)
if (m):
rel_match['m'] = m
rel_match['rel_name'] = rel_name
return rel_match
return rel_match
def get_rel_name(self, value):
for rel in self.releases:
if (rel in value):
return rel
return ""
def update_latest_tarball_stable(self, value):
index_parser = self.index_parser
release = self.get_rel_name(value)
if (not release):
return
if ('tar.sign' in value):
return
if (index_parser.release_extension not in value):
return
rel_match = self.get_rel_match(value)
if (not rel_match['m']):
return
rel_specifics = rel_match['m'].groupdict()
supported = True
if (self.is_rel_eol(rel_specifics)):
supported = False
p = re.compile(index_parser.release_extension + '$')
rel_name = p.sub("", value)
base_release = ''
for r in index_parser.supported:
if (r in value):
base_release = r
break
ver = rel_name.lstrip(rel_match['rel_name'] + '-')
p = re.compile('-[usnpc]*$')
short_ver = p.sub("", ver)
tmp_changelog = 'ChangeLog-' + short_ver
tmp_changelog_signed = tmp_changelog + ".sign"
if (index_parser.ignore_changelogs):
chanlog_req = False
else:
if ("rc" in ver):
chanlog_req = False
else:
chanlog_req = True
w = compute_rel_weight(ver)
if (not w):
return
tar = dict(version=short_ver,
base_release=r,
weight=w,
rel=rel_name,
specifics=rel_specifics,
base_url=self.base_url,
base_url_validated=False,
url=self.base_url + '/' + value,
maintained=supported,
longterm=False,
next_rel=False,
tarball=value,
tarball_exists=True,
ignore_signature=index_parser.ignore_signatures,
signed_tarball=rel_name + '.tar.sign',
signed_tarball_exists=False,
changelog=tmp_changelog,
changelog_url=self.base_url + '/' + tmp_changelog,
changelog_exists=False,
changelog_required=chanlog_req,
signed_changelog=tmp_changelog_signed,
signed_changelog_exists=False,
verified=False)
self.tarball_add_stable(tar)
def update_latest_tarball_next(self, value):
index_parser = self.index_parser
rel_match = self.get_rel_match_next(value)
if (not rel_match['m']):
return
rel_specifics = rel_match['m'].groupdict()
rel_name_next = rel_match[
'rel_name'] + '-' + rel_specifics['DATE_VERSION']
next_version = rel_specifics['DATE_VERSION']
if (rel_specifics['EXTRAVERSION'] != ''):
rel_name_next = rel_name_next + '-' + rel_specifics['EXTRAVERSION']
next_version = next_version + '-' + rel_specifics['EXTRAVERSION']
if (rel_specifics['RELMOD'] != ''):
rel_name_next = rel_name_next + '-' + rel_specifics['RELMOD']
next_version = next_version + '-' + rel_specifics['RELMOD']
tar_next = rel_name_next + index_parser.release_extension
s_tarball_next = rel_name_next + ".tar.sign"
tmp_changelog = 'ChangeLog-' + next_version
tmp_changelog_signed = 'ChangeLog-' + next_version + ".sign"
w = compute_rel_weight_next(next_version)
tar_next = dict(version=next_version,
weight=w,
rel=rel_name_next,
url=self.base_url + '/' + tar_next,
specifics=rel_specifics,
base_url=self.base_url,
base_url_validated=False,
maintained=True,
longterm=False,
next_rel=True,
tarball=tar_next,
tarball_exists=True,
ignore_signature=index_parser.ignore_signatures,
signed_tarball=s_tarball_next,
signed_tarball_exists=False,
changelog=tmp_changelog,
changelog_url=self.base_url + '/' + tmp_changelog,
changelog_exists=False,
changelog_required=False,
signed_changelog=tmp_changelog_signed,
signed_changelog_exists=False,
verified=False)
self.tarball_add_next(tar_next)
def print_tarballs(self):
for tar in self.tarballs:
specifics = tar.get('specifics')
sys.stdout.write("Tarball: %s<br>----extra: %s mod_update: %s mod_type: %s<br>" %
(tar.get('url'),
specifics['EXTRAVERSION'],
specifics['RELMOD_UPDATE'],
specifics['RELMOD_TYPE']))
def is_biggest_tarball(self, pivot_tar, tars):
other_greater = 0
for tar in tars:
if pivot_tar.get('rel') == tar.get('rel'):
continue
if pivot_tar.get('base_release') == tar.get('base_release'):
if pivot_tar.get('weight') < tar.get('weight'):
other_greater = other_greater + 1
if (other_greater == 0):
return True
return False
def update_rel_candidates(self):
index_parser = self.index_parser
for tar in self.tarballs:
index_parser.rel_html_rels.append(tar)
for tar in index_parser.rel_html_rels:
if not self.is_biggest_tarball(tar, index_parser.rel_html_rels):
index_parser.rel_html_rels.remove(tar)
def is_next_rel(self, value):
index_parser = self.index_parser
for next_date in index_parser.next_rel_dates:
if (next_date != '' and
next_date in value and
index_parser.release_extension in value):
return True
return False
def handle_starttag(self, tag, attributes):
"Process a tags and its 'attributes'."
index_parser = self.index_parser
if tag != 'a':
pass
for name, value in attributes:
if name != 'href':
pass
release = self.get_rel_name(value)
if (release not in value):
pass
if (self.is_next_rel(value)):
self.update_latest_tarball_next(value)
pass
self.update_latest_tarball_stable(value)
def handle_endtag(self, tag):
pass
def handle_data(self, data):
pass
def handle_comment(self, data):
pass
def __init__(self, index_parser, releases, url):
HTMLParser.__init__(self)
self.index_parser = index_parser
self.base_url = url.rstrip("/")
self.releases = releases
self.tarballs = []
class index_rel_inferrer(HTMLParser):
"Goes through an index page with releases and update the inferred release"
def parse(self, html):
"Parse the given string 's'."
self.feed(html)
self.close()
def handle_decl(self, decl):
pass
def revise_inference(self, rel, value, rel_name):
index_parser = self.index_parser
value = value.lstrip(rel_name + "-")
p = re.compile(index_parser.release_extension + '$')
value = p.sub("", value)
base_specs = get_rel_spec(rel.get('base'))
if (not base_specs):
return
inferred_specs = get_rel_spec(value)
if (not inferred_specs):
return
if (inferred_specs['VERSION'] != base_specs['VERSION']):
return
if (inferred_specs['PATCHLEVEL'] != base_specs['PATCHLEVEL']):
return
if (base_specs['SUBLEVEL'] != ''):
if (inferred_specs['SUBLEVEL'] != base_specs['SUBLEVEL']):
return
w = compute_rel_weight(value)
if (not w):
return
inferred_rel = dict(base=rel,
url=self.base_url,
highest_release=value,
weight=w)
# XXX: better way to do this?
if (rel.get('highest_release') == ''):
rel['url'] = self.base_url
rel['highest_release'] = value
rel['weight'] = w
if (rel.get('weight') < inferred_rel.get('weight')):
rel['url'] = self.base_url
rel['highest_release'] = value
rel['weight'] = w
def handle_starttag(self, tag, attributes):
"Process a tags and its 'attributes'."
index_parser = self.index_parser
if tag != 'a':
return
for name, value in attributes:
if name != 'href':
return
rel_name = index_parser.search_rel_name(value)
if (not rel_name):
return
if (index_parser.release_extension not in value):
return
if (".sign" in value):
return
for rel in index_parser.inferred_releases:
if (rel.get('base') not in value):
continue
self.revise_inference(rel, value, rel_name)
def handle_endtag(self, tag):
pass
def handle_data(self, data):
pass
def handle_comment(self, data):
pass
def __init__(self, index_parser, url):
HTMLParser.__init__(self)
self.index_parser = index_parser
self.base_url = url
class stable_url_parser(HTMLParser):
"Goes through an index page and returns a URL for a release"
def parse(self, html):
"Parse the given string 's'."
self.feed(html)
self.close()
def update_url(self, rel_target, rel_target_string, r, r_string):
rel_string = r_string.lstrip("/v")
rel_string = rel_string.rstrip("/")
w = compute_rel_weight(rel_string)
rel = dict(release_base=rel_target_string,
release=rel_string,
weight=w,
version=r['VERSION'],
patchlevel=r['PATCHLEVEL'],
extraversion=r['EXTRAVERSION'],
sublevel=r['SUBLEVEL'],
relmod=r['RELMOD_UPDATE'],
rel_url=self.base_url.rstrip("/") + '/' + r_string.rstrip("/"))
if (len(self.stable_urls) == 0):
self.stable_urls.append(rel)
return
for r_tmp in self.stable_urls:
if (r_tmp.get('release_base') != rel_target_string):
continue
if (r_tmp.get('release') == r_string):
return
if (r_tmp.get('weight') < w):
self.stable_urls.remove(r_tmp)
self.stable_urls.append(rel)
return
self.stable_urls.append(rel)
def update_stable_urls(self, rel, value):
rel_target = get_rel_spec(rel)
if (not rel_target):
return
r = get_rel_spec(value)
if (not r):
return
if (rel_target['VERSION'] == ''):
return
if (rel_target['VERSION'] != r['VERSION']):
return
if (rel_target['PATCHLEVEL'] == ''):
return
if (rel_target['PATCHLEVEL'] != r['PATCHLEVEL']):
return
self.update_url(rel_target, rel, r, value)
def handle_decl(self, decl):
pass
def handle_starttag(self, tag, attributes):
"Process a tags and its 'attributes'."
if tag != 'a':
pass
for name, value in attributes:
if name != 'href':
pass
for rel in self.index_parser.supported:
if (rel in value):
self.update_stable_urls(rel, value)
def handle_endtag(self, tag):
pass
def handle_data(self, data):
pass
def handle_comment(self, data):
pass
def __init__(self, index_parser, url):
HTMLParser.__init__(self)
self.index_parser = index_parser
self.base_url = url
self.stable_urls = []
class index_parser(HTMLParser):
"HTML index parser for software releases class."
def parse(self, html, url):
"Parse the given string 's'."
self.feed(html)
self.close()
def __init__(self, config_file):
HTMLParser.__init__(self)
self.config = ConfigParser.SafeConfigParser()
self.config.read(config_file)
self.rel_html_proj = self.config.get("project", "rel_html_proj")
if (self.config.has_option("project", "rel_html_proj_aliases")):
self.rel_html_proj_aliases = self.config.get(
"project", "rel_html_proj_aliases").split()
else:
self.rel_html_proj_aliases = list()
self.rel_names = self.rel_html_proj_aliases
self.rel_names.insert(0, self.rel_html_proj)
self.inferred_releases = []
if (self.config.has_option("project", "supported")):
self.supported = self.config.get("project", "supported").split()
for rel in self.supported:
inferred_rel = dict(base=rel,
url='',
highest_release='',
weight=0)
self.inferred_releases.append(inferred_rel)
else:
self.supported = list()
if (self.config.has_option("project", "eol")):
self.eol = self.config.get("project", "eol").split()
for rel in self.eol:
inferred_rel = dict(base=rel, url='',
highest_release='',
weight=0)
self.inferred_releases.append(inferred_rel)
else:
self.eol = list()
self.stable_urls = []
self.rel_html_release_urls = []
urls = self.config.get("project", "rel_html_url_releases").split()
for url in urls:
self.rel_html_release_urls.append(url.strip())
self.rel_html_rels = []
if (self.config.has_option("project", "ignore_signatures")):
self.ignore_signatures = self.config.get(
"project", "ignore_signatures")
else:
self.ignore_signatures = False
if (self.config.has_option("project", "ignore_changelogs")):
self.ignore_changelogs = self.config.get(
"project", "ignore_changelogs")
else:
self.ignore_changelogs = False
if (self.config.has_option("project", "release_extension")):
self.release_extension = "." + \
self.config.get("project", "release_extension")
else:
self.release_extension = ".tar.xz"
self.next_rel_day = 0
self.next_rel_month = 0
self.next_rel_url = ''
self.next_rel_date = ''
self.next_rel_date_rfc3339 = ''
self.next_rel_dates = list()
self.rel_license = self.config.get("project", "rel_license")
self.html_title = self.config.get("html", "title")
self.html_nav_dict = ({'url': self.config.get("html", "nav_01_url"),
'txt': self.config.get("html", "nav_01_txt")},
{'url': self.config.get("html", "nav_02_url"),
'txt': self.config.get("html", "nav_02_txt")},
{'url': self.config.get("html", "nav_03_url"),
'txt': self.config.get("html", "nav_03_txt")})
self.html_release_title = self.config.get("html", "release_title")
if (self.config.has_option("html", "release_title_next")):
self.html_release_title_next = self.config.get(
"html", "release_title_next")
else:
self.html_release_title_next = ''
self.html_about_title = self.config.get("html", "about_title")
self.html_about = self.config.get("html", "about")
self.rels = []
self.signed = False
self.changelog = ''
self.signed_changelog = False
def get_stable_ext_urls(self, url):
url_parser = stable_url_parser(self, url)
try:
rel = urllib2.urlopen(url)
html = rel.read()
url_parser.parse(html)
self.stable_urls = url_parser.stable_urls
except urllib2.HTTPError as error:
return
def search_rel_name(self, value):
for rel_name in self.rel_names:
if (rel_name in value):
return rel_name
return ""
def search_stable_tarballs(self, ver, url):
try:
tarball_hunter = index_tarball_hunter(self, [ver], url)
f = urllib2.urlopen(url)
html = f.read()
tarball_hunter.parse(html)
tarball_hunter.update_rel_candidates()
except urllib2.HTTPError as error:
return
except urllib2.URLError as e:
return
def evaluate_stable_ext_urls(self):
for r in self.stable_urls:
self.search_stable_tarballs(r.get('release'), r.get('rel_url'))
def update_inferred_releases(self, url):
try:
rel_inferrer = index_rel_inferrer(self, url)
f = urllib2.urlopen(url)
html = f.read()
rel_inferrer.parse(html)
except urllib2.HTTPError as error:
return
except urllib2.URLError as e:
return
def evaluate_inferred_releases(self):
for r in self.inferred_releases:
if (r.get('url') == ''):
continue
self.search_stable_tarballs(r.get('highest_release'), r.get('url'))
def __get_next_rel_page(self, url):
r = urllib2.urlopen(url)
html = r.read()
num_parser = largest_num_href_parser()
return num_parser.parse(html)
def get_next_url(self, url):
self.next_rel_month = self.__get_next_rel_page(url)
self.next_rel_day = self.__get_next_rel_page(
url + self.next_rel_month)
self.next_rel_url = url + \
self.next_rel_month + '/' + self.next_rel_day
self.next_rel_date_rfc3339 = str(date.today().year) + '-' + \
self.next_rel_month + '-' + self.next_rel_day
self.next_rel_date = self.next_rel_date_rfc3339.replace("-", "")
self.next_rel_dates.append(self.next_rel_date_rfc3339)
self.next_rel_dates.append(self.next_rel_date)
def evaluate_next_url(self):
try:
tarball_hunter = index_tarball_hunter(self,
self.next_rel_dates,
self.next_rel_url)
f = urllib2.urlopen(self.next_rel_url)
html = f.read()
tarball_hunter.parse(html)
tarball_hunter.update_rel_candidates()
except urllib2.HTTPError as error:
return
def scrape_for_releases(self):
for url in self.rel_html_release_urls:
if url.endswith('stable/'):
self.get_stable_ext_urls(url)
self.evaluate_stable_ext_urls()
elif url.endswith(str(date.today().year) + '/'):
self.get_next_url(url)
self.evaluate_next_url()
else:
self.update_inferred_releases(url)
self.evaluate_inferred_releases()
def review_base_url(self, ver, url):
try:
f_rel = urllib2.urlopen(url)
html = f_rel.read()
self.parse(html, url)
except urllib2.HTTPError as error:
return
def validate_releases(self):
for r in self.rel_html_rels:
if (r.get('base_url_reviewed')):
continue
if (r.get('base_url') == ''):
continue
self.review_base_url(self, r.get('base_url'))
self.rel_html_rels = sort_rels_weight(self.rel_html_rels)
def handle_starttag(self, tag, attributes):
"Process a tags and its 'attributes'."
if tag != 'a':
pass
for name, value in attributes:
if name != 'href':
pass
for r in self.rel_html_rels:
# sys.stdout.write('%s<br>\n' % value)
if r.get('version') not in value:
continue
if r.get('signed_tarball') in value:
r['signed_tarball_exists'] = True
elif r.get('tarball') in value:
if "longerm" in value:
r['longterm'] = True
elif (r.get('changelog') == value):
r['changelog_exists'] = True
elif (r.get('signed_changelog') == value):
r['signed_changelog_exists'] = True
def handle_endtag(self, tag):
pass
def handle_data(self, data):
pass
def releases_verified(self):
"Verify releases"
all_verified = True
for r in self.rel_html_rels:
if (not r['tarball_exists']):
all_verified = False
sys.stdout.write('No tarball: %s<br>\n' % r['tarball'])
break
if (not r['ignore_signature']):
if (not r['signed_tarball_exists']):
all_verified = False
sys.stdout.write(
'No signed tarball: %s<br>\n' % r['signed_tarball'])
break
if (r['changelog_required']):
if (not (r['changelog_exists'])):
all_verified = False
sys.stdout.write(
'No changelog (%s): %s<br>\n' % (r['changelog'], r['version']))
break
if (not (r['signed_changelog_exists'])):
sys.stdout.write('No signed changelog (%s): %s<br>\n' %
(r['signed_changelog'], r['version']))
all_verified = False
break
else:
if (r['changelog_exists'] and
(not (r['signed_changelog_exists']))):
sys.stdout.write("Although a ChangeLog is not "
"required for this release (%s), one does "
"but it is not digitally signed. The "
"file %s does not exist<br>" %
(r['version'], r['signed_changelog']))
all_verified = False
r['verified'] = True
return all_verified
class largest_num_href_parser(HTMLParser):
"Will take an index page and return the highest numbered link"
def parse(self, html):
"Parse the given string 's'."
self.feed(html)
self.close()
return self.number
def handle_decl(self, decl):
pass
def handle_starttag(self, tag, attributes):
"Process a tags and its 'attributes'."
if tag != 'a':
pass
for name, value in attributes:
if name != 'href':
pass
if (re.match(r'\d+', value)):
number = re.sub(r'\D', "", value)
if (number > self.number):
self.number = number
def handle_endtag(self, tag):
pass
def handle_data(self, data):
pass
def handle_comment(self, data):
pass
def __init__(self):
HTMLParser.__init__(self)
self.number = 0
# Example full html output parser, this can be used to
# simply read and output a full HTML file, you can modify
# this class to help you modify the contents. We do that
# later.
class html_base(HTMLParser):
"HTML 5 generator from parsed index parser content."
def parse(self, html):
"Parse the given string 's'."
self.feed(html)
self.close()
def handle_decl(self, decl):
sys.stdout.write('<!%s>' % decl)
def handle_starttag(self, tag, attributes):
sys.stdout.write('<%s' % tag)
for name, value in attributes:
sys.stdout.write(' %s="%s"' % (name, value))
sys.stdout.write('>')
def handle_endtag(self, tag):
sys.stdout.write('</%s>' % tag)
def handle_data(self, data):
sys.stdout.write('%s' % data)
def handle_comment(self, data):
sys.stdout.write('<!--%s-->' % data)
def __init__(self):
HTMLParser.__init__(self)
def license_url(license):
if (license == 'GPLv2'):
return "http://www.gnu.org/licenses/gpl-2.0.html"
elif (license == 'ISC'):
return "http://opensource.org/licenses/ISC"
elif (license == 'AGPL'):
return "http://www.gnu.org/licenses/agpl-3.0.html"
elif (license == 'copyleft-next'):
return "http://copyleft-next.org/"
else:
return "http://opensource.org/licenses/alphabetical"
class rel_html_gen(HTMLParser):
"HTML 5 generator from parsed index parser content."
def __init__(self, parser):
HTMLParser.__init__(self)
self.parser = parser
self.skip_endtag = False
self.latest_stable = {}
self.next_rels = []
self.next_rel_count = 0
def handle_title(self, tag, attributes):
sys.stdout.write('<%s>%s' % (tag, self.parser.html_title))
def handle_def_start(self, tag, attributes):
sys.stdout.write('<%s' % tag)
for name, value in attributes:
sys.stdout.write(' %s="%s"' % (name, value))
sys.stdout.write('>')
def handle_h1_top(self, tag, attributes):
self.skip_endtag = True
sys.stdout.write('%s</h1>\n' % (self.parser.html_title))
sys.stdout.write('\t\t<nav>\n')
sys.stdout.write('\t\t\t<ul>\n')
for nav in self.parser.html_nav_dict:
sys.stdout.write(
'\t\t\t\t<li><a href="%s">%s</a></li>\n' % (nav['url'], nav['txt']))
sys.stdout.write('\t\t\t</ul>\n')
sys.stdout.write('\t\t</nav>\n')
def handle_h1_release(self, tag, attributes):
self.skip_endtag = True
sys.stdout.write('%s</h1>\n' % (self.parser.html_release_title))
sys.stdout.write('\t\t\t<table border="0">\n')
count = 0
for r in self.parser.rel_html_rels:
count += 1
if (not r.get('verified')):
continue
if (count == 2):
latest_stable = r
if (r.get('next_rel')):
self.next_rels.append(r)
self.next_rel_count = self.next_rel_count + 1
continue
sys.stdout.write('\t\t\t\t<tr>')
sys.stdout.write('\t\t\t\t<td><a href="%s">%s</a></td>\n' %
(r.get('url'), r.get('rel')))
if (not r.get('ignore_signature')):
sys.stdout.write('\t\t\t\t<td><a href="%s">signed</a></td>\n' %
(r.get('base_url') + '/' + r.get('signed_tarball')))
else:
sys.stdout.write('\t\t\t\t<td></td>\n')
if (r.get('maintained')):
sys.stdout.write('\t\t\t\t<td></td>\n')
else:
sys.stdout.write(
'\t\t\t\t<td><font color="FF0000">EOL</font></td>\n')
if (not r.get('longterm')):
sys.stdout.write('\t\t\t\t<td></td>\n')
else:
sys.stdout.write(
'\t\t\t\t<td><font color="00FF00">Longterm</font></td>\n')
if (r.get('changelog_required')):
sys.stdout.write('\t\t\t\t<td><a href="%s">%s</a></td>\n' %
(r.get('changelog_url'), "ChangeLog"))
else:
sys.stdout.write('\t\t\t\t<td></td>\n')
sys.stdout.write('\t\t\t\t</tr>')
sys.stdout.write('\t\t\t</table>\n')
def handle_h1_release_next(self, tag, attributes):
if (self.next_rel_count <= 0):
return
if (not len(self.next_rels)):
return
sys.stdout.write('%s</h1>\n' % (self.parser.html_release_title_next))
sys.stdout.write('\t\t\t<table border="0">\n')
for r in self.next_rels:
if (not r.get('verified')):
continue
sys.stdout.write('\t\t\t\t<tr>')
sys.stdout.write('\t\t\t\t<td><a href="%s">%s</a></td>\n' %
(r.get('url'), r.get('rel')))
if (not r.get('ignore_signature')):
sys.stdout.write('\t\t\t\t<td><a href="%s">signed</a></td>\n' %
(r.get('base_url') + r.get('signed_tarball')))
else:
sys.stdout.write('\t\t\t\t<td></td>\n')
if (r.get('maintained')):
sys.stdout.write('\t\t\t\t<td></td>\n')
else:
sys.stdout.write(
'\t\t\t\t<td><font color="FF0000">EOL</font></td>\n')
if (not r.get('longterm')):
sys.stdout.write('\t\t\t\t<td></td>\n')
else:
sys.stdout.write(
'\t\t\t\t<td><font color="00FF00">Longterm</font></td>\n')
if (r.get('changelog_required')):
sys.stdout.write('\t\t\t\t<td><a href="%s">%s</a></td>\n' %
(r.get('changelog_url'), "ChangeLog"))
else:
sys.stdout.write('\t\t\t\t<td></td>\n')
sys.stdout.write('\t\t\t\t</tr>')
sys.stdout.write('\t\t\t</table>\n')
def handle_h1_about(self, tag, attributes):
self.skip_endtag = True
sys.stdout.write('%s</h1>\n' % (self.parser.html_about_title))
sys.stdout.write('<p>%s</p>\n' % (self.parser.html_about))
def handle_h_license(self, tag, attributes):
self.skip_endtag = True
sys.stdout.write('License</h1>\n')
sys.stdout.write('\t\t<p>%s is licensed under the <a href="%s">%s</a>. \n' %
(self.parser.rel_html_proj,
license_url(self.parser.rel_license),
self.parser.rel_license))
sys.stdout.write('This page was generated by %s licensed under the <a href="%s">%s</a></p>\n' %
(rel_html_href(),
license_url(rel_html_license()),
rel_html_license()))
def handle_h1_pass(self, tag, attributes):
pass
def handle_h(self, tag, attributes):
def_run = self.handle_h1_pass
for name, value in attributes:
if (name == 'id'):
if (value == 'top_content'):
def_run = self.handle_h1_top
elif (value == 'release_title'):
def_run = self.handle_h1_release
elif (value == 'release_title_next'):
def_run = self.handle_h1_release_next
elif (value == 'about'):
def_run = self.handle_h1_about
elif (value == 'license'):
def_run = self.handle_h_license
sys.stdout.write('<%s' % tag)
for name, value in attributes:
sys.stdout.write(' %s="%s"' % (name, value))
sys.stdout.write('>')
def_run(tag, attributes)
def parse(self, html):
"Parse the given string 's'."
self.feed(html)
self.close()
def handle_decl(self, decl):
sys.stdout.write('<!%s>' % decl)
def handle_starttag(self, tag, attributes):
self.skip_endtag = False
if (tag == 'title'):
self.handle_title(tag, attributes)
elif (tag in {'h1', 'h2', 'h3'}):
self.handle_h(tag, attributes)
else:
self.handle_def_start(tag, attributes)
def handle_endtag(self, tag):
if (self.skip_endtag):
pass
sys.stdout.write('</%s>' % tag)
def handle_data(self, data):
sys.stdout.write('%s' % data)
def handle_comment(self, data):
sys.stdout.write('<!--%s-->' % data)
def check_file(file_input):
if not os.path.isfile(file_input):
print 'File not found: %(file)s' % {"file": file_input}
usage()
def try_rel_next(rel):
sys.stdout.write("----------------------------------------\n")
sys.stdout.write("Rel: %s\n" % rel)
w = compute_rel_weight_next(rel)
sys.stdout.write("Weight: %s\n" % w)
def try_rel(rel_dict):
sys.stdout.write("----------------------------------------\n")
sys.stdout.write("Rel: %s\n" % rel_dict.get('version'))
rel_dict["weight"] = compute_rel_weight(rel_dict.get('version'))
sys.stdout.write("Weight: %s\n" % rel_dict.get("weight"))
def print_rels_weight(rels):
for r in rels:
sys.stdout.write("Rel: %20s\t%20s\n" %
(r.get('version'), r.get('weight')))
def try_rels(rels):
col = []
rsorted = []
max_weight = 0
for rel in rels:
rel_d = dict(version=rel, weight=0)
col.append(rel_d)
for r in col:
try_rel(r)
col = sort_rels_weight(col)
print_rels_weight(col)
def debug_rel_tests():
try_rel_next("20130510-2-u")
try_rel_next("2013-01-10-2-u")
try_rel_next("20130110-2-u")
try_rel_next("2013-03-07-u")
try_rel_next("2013-03-07")
rels = ["2.6.32.3",
"3.8",
"2.6.32.1",
"2.6.32.40",
"2.6.32",
"3.8.2",
"3.5.1",
"3.2.1",
"3.7.1",
"3.8.2-1-usnpc",
"3.8-rc1",
"3.8-rc1-1-usnpc",
"3.8-rc2-2-usnpc",
"3.8-rc2-2-c",
"3.8-rc2-2-s",
"3.8-rc2-2",
"3.8-rc3-1-u"]
try_rels(rels)
def main():
if (debug):
debug_rel_tests()
sys.exit(1)
config_file = ''
try:
opts, args = getopt.getopt(sys.argv[1:], "hf:")
except getopt.GetoptError as err:
print str(err)
usage()
for o, a in opts:
if o in ("-f"):
check_file(a)
config_file = a
elif o in ("-h", "--help"):
usage()
if len(config_file) == 0:
config_file = 'rel-html.cfg'
parser = index_parser(config_file)
html = ""
# We go through two passes on the provided URLs:
#
# 1. Scraping for releases
# 2. Validation
#
# The first pass is based on finding the
# highest release extraversion tarball. We
# require a second pass as validation entails
# searching for a ChangeLog and signature file
# for all known existing releases.
parser.scrape_for_releases()
parser.validate_releases()
if (not parser.releases_verified()):
sys.stdout.write("Releases not verified\n")
sys.exit(1)
gen = rel_html_gen(parser)
f = open('html/template.html', 'r')
html = f.read()
gen.parse(html)
def usage():
print ''
print '%(cmd)s' % {"cmd": sys.argv[0]}
print ''
print 'Provided an index URL and a few project hints page this'
print 'will spit out a shiny HTML 5 W3C compliant releases page.'
print ''
rel_html_license_verbose()
print ''
print 'This program can be run without arguments or with a project file passed'
print 'as an argument. If no arguments are given it will assume you have the'
print 'file rel-html.cfg present on your current directory.'
print ''
print 'Usage: %(cmd)s [ -f rel-html.cfg ]' % {"cmd": sys.argv[0]}
sys.exit(2)
if __name__ == "__main__":
main()