blob: 3cfd531b9c7788688b0fd4ae4b95f4e0aacdc84a [file] [log] [blame]
#!/usr/bin/env python3
# ,xref.py - Collect xrefs from HTML version of POSIX standard
#
# Copyright (C) 2013 Felix Janda
#
# Permission is hereby granted, free of charge, to any person
# obtaining a copy of this software and associated documentation files
# (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge,
# publish, distribute, sublicense, and/or sell copies of the Software,
# and to permit persons to whom the Software is furnished to do so,
# subject to the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
import re
import sys
import fileinput
from glob import glob
from html.entities import entitydefs
sections = [["basedefs"], ["functions"], ["utilities"], ["xrat"]]
re1 = re.compile('^.*<a name="tag_*|</.*>\n$| Table: | Figure: ')
re2 = re.compile('">[^>]*</a>')
re3 = re.compile('^.*> |\..*\n$')
htmlrefs=[] # will be filled with lists of the form ["Chapter 1", "Introduction"]
files = [] # all V*_chap*.html files
for s in sections:
l = glob(sys.argv[1] + "/" + s[0] + "/V*_chap*.html")
l.sort()
files = files + l
for line in fileinput.input(files):
if '<a name="tag' in line and not "_foot_" in line:
new = [] # To be added to entityrefs
prefix = "Section"
if "<h2>" in line:
prefix = "Chapter"
chapter = re3.sub("", line, count=2)
table = 1
figure = 1
elif "Table:" in line:
prefix = "Table"
new.append(prefix + " " + chapter + "-" + str(table))
table += 1
elif "Figure:" in line:
prefix = "Figure"
new.append(prefix + " " + chapter + "-" + str(figure))
figure += 1
line = re1.sub("", re2.sub(" ", line), count=3)
# Let python deal with the html entities
for ent in entitydefs:
line = line.replace("&" + ent + ";", entitydefs[ent])
part = line.partition(" ")
# Deal with the Sections
if not len(new):
partpart = part[0].split("_")
s = chapter
for i in range(1, len(partpart)):
s = s + "." + str(int(partpart[i]))
new.append(prefix + " " + s)
new.append(part[2])
htmlrefs.append(new)
# Get the label names from ,xref.1 and print the results to stdout
re4 = re.compile('Chapter|Section|Table|Figure')
i = 0
for line in open(",xref.1").readlines():
line = line.lstrip(" ").strip("\n")
part = line.partition(" ")
if re4.match(part[2]):
for j in range(i, len(htmlrefs)):
if part[2] == htmlrefs[j][0]:
print(line + ", " + htmlrefs[j][1])
i = j
break
else:
print(line)
else:
print(line)