| #!/usr/bin/env python3 |
| # ,xref.py - Collect xrefs from HTML version of POSIX standard |
| # |
| # Copyright (C) 2013 Felix Janda |
| # |
| # Permission is hereby granted, free of charge, to any person |
| # obtaining a copy of this software and associated documentation files |
| # (the "Software"), to deal in the Software without restriction, |
| # including without limitation the rights to use, copy, modify, merge, |
| # publish, distribute, sublicense, and/or sell copies of the Software, |
| # and to permit persons to whom the Software is furnished to do so, |
| # subject to the following conditions: |
| # |
| # The above copyright notice and this permission notice shall be |
| # included in all copies or substantial portions of the Software. |
| # |
| # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
| # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
| # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
| # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS |
| # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN |
| # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
| # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| # SOFTWARE. |
| |
| import re |
| import sys |
| import fileinput |
| from glob import glob |
| from html.entities import entitydefs |
| |
| sections = [["basedefs"], ["functions"], ["utilities"], ["xrat"]] |
| re1 = re.compile('^.*<a name="tag_*|</.*>\n$| Table: | Figure: ') |
| re2 = re.compile('">[^>]*</a>') |
| re3 = re.compile('^.*> |\..*\n$') |
| |
| htmlrefs=[] # will be filled with lists of the form ["Chapter 1", "Introduction"] |
| files = [] # all V*_chap*.html files |
| for s in sections: |
| l = glob(sys.argv[1] + "/" + s[0] + "/V*_chap*.html") |
| l.sort() |
| files = files + l |
| for line in fileinput.input(files): |
| if '<a name="tag' in line and not "_foot_" in line: |
| new = [] # To be added to entityrefs |
| prefix = "Section" |
| if "<h2>" in line: |
| prefix = "Chapter" |
| chapter = re3.sub("", line, count=2) |
| table = 1 |
| figure = 1 |
| elif "Table:" in line: |
| prefix = "Table" |
| new.append(prefix + " " + chapter + "-" + str(table)) |
| table += 1 |
| elif "Figure:" in line: |
| prefix = "Figure" |
| new.append(prefix + " " + chapter + "-" + str(figure)) |
| figure += 1 |
| |
| line = re1.sub("", re2.sub(" ", line), count=3) |
| |
| # Let python deal with the html entities |
| for ent in entitydefs: |
| line = line.replace("&" + ent + ";", entitydefs[ent]) |
| |
| part = line.partition(" ") |
| |
| # Deal with the Sections |
| if not len(new): |
| partpart = part[0].split("_") |
| s = chapter |
| for i in range(1, len(partpart)): |
| s = s + "." + str(int(partpart[i])) |
| new.append(prefix + " " + s) |
| |
| new.append(part[2]) |
| htmlrefs.append(new) |
| |
| # Get the label names from ,xref.1 and print the results to stdout |
| re4 = re.compile('Chapter|Section|Table|Figure') |
| i = 0 |
| for line in open(",xref.1").readlines(): |
| line = line.lstrip(" ").strip("\n") |
| part = line.partition(" ") |
| if re4.match(part[2]): |
| for j in range(i, len(htmlrefs)): |
| if part[2] == htmlrefs[j][0]: |
| print(line + ", " + htmlrefs[j][1]) |
| i = j |
| break |
| else: |
| print(line) |
| else: |
| print(line) |