#!/usr/bin/python3 # # Copyright © 2016 Dr. Tobias Quathamer # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . import re import sys import textwrap from pathlib import Path # The standard short names in Debian are defined here: # https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ license_information = [ { 'shortname': 'BSD-3-clause', 'filename': 'BSD-3-clause-UCB', 'upstream_names': ['BSD_3_CLAUSE_UCB'], }, { 'shortname': 'BSD-4-clause', 'filename': 'BSD-4-clause-UCB', 'upstream_names': ['BSD_4_CLAUSE_UCB', 'BSD_ONELINE_CDROM'], }, { 'shortname': 'Expat', 'filename': 'Expat', 'upstream_names': ['PERMISSIVE_MISC', 'MIT'], }, { # This shortname is not defined by the standard. 'shortname': 'freely-redistributable', 'filename': 'freely-redistributable', 'upstream_names': ['FREELY_REDISTRIBUTABLE'], }, { 'shortname': 'GPL-2', 'filename': 'GPL-2', 'upstream_names': ['GPLv2_MISC', 'GPLv2_ONELINE'], }, { 'shortname': 'GPL-2+', 'filename': 'GPL-2+', 'upstream_names': ['GPL_NOVERSION_ONELINE', 'GPLv2+', 'GPLv2+_DOC_FULL', 'GPLv2+_DOC_MISC', 'GPLv2+_DOC_ONEPARA', 'GPLv2+_SW_3_PARA', 'GPLv2+_SW_ONEPARA'], }, { # This shortname is not defined by the standard. 'shortname': 'henry-spencer-regex', 'filename': 'henry-spencer-regex', 'upstream_names': ['MISC'], }, { # This shortname is not defined by the standard. 'shortname': 'LDPv1', 'filename': 'LDPv1', 'upstream_names': ['LDPv1'], }, { 'shortname': 'public-domain', 'filename': 'public-domain', 'upstream_names': ['PUBLIC_DOMAIN'], }, { # This shortname is not defined by the standard. 'shortname': 'verbatim', 'filename': 'verbatim', 'upstream_names': ['VERBATIM', 'VERBATIM_ONE_PARA', 'VERBATIM_TWO_PARA', 'VERBATIM_PROF'], }, ] licenses_with_manpages = {} symlinks = {} def get_license_shortname(name): """Gets the Debian shortname for the name supplied by upstream. Includes a check that upstream's names are only assigned to one shortname. """ shortname = "" already_found = False for info in license_information: if name in info['upstream_names']: if not already_found: shortname = info['shortname'] already_found = True else: sys.exit("Fatal error: Upstream license name defined multiple times: " + name) return shortname def add_manpage_to_shortname(manpage, copyright_holders, licenses): # Ensure a string for the filename filename = str(manpage) # Strip the leading "../" filename = filename[3:len(filename)] # Common case: only one license for the manpage, so # the shortname is just e.g. "GPL-2+" shortname = " and ".join(sorted(licenses)) if shortname not in licenses_with_manpages: licenses_with_manpages[shortname] = {'files': [filename], 'copyright': copyright_holders} else: licenses_with_manpages[shortname]['files'].append(filename) # Do not add same lines twice existing_copyright_holders = licenses_with_manpages[shortname]['copyright'] joined_copyright_holders = list(set(existing_copyright_holders + copyright_holders)) licenses_with_manpages[shortname]['copyright'] = joined_copyright_holders def get_copyright_stanza(shortname, file_info): stanza = "" # Collect files and symlinks into a common list all_files = file_info['files'] for file in file_info['files']: if file in symlinks: all_files = all_files + symlinks[file] # Join the files into a whitespace separated list, # at most 76 characters long files = " ".join(sorted(all_files)) # The wrap is 69 + 7 (length of "Files: ") = 76 files = textwrap.wrap(files, width=69, break_long_words=False, break_on_hyphens=False) files = "\n ".join(files) # Now format the copyright holders copyright = "\n ".join(sorted(file_info['copyright'])) # An empty field is an error, so ensure a value if len(copyright) == 0: copyright = "(could not be detected automatically)" # Finally, create the stanza stanza += "Files: " + files stanza += "\nCopyright: " + copyright stanza += "\nLicense: " + shortname + "\n\n" return stanza def get_license_text(shortname): """Gets the text for the Debian license shortname.""" text = "" for info in license_information: if info['shortname'] == shortname: text += "License: " + shortname + "\n" with open("licenses/" + info['filename']) as licensefile: for line in licensefile: text += " " + line return text p = Path("..") for manpage in p.glob("man?/*"): with manpage.open() as file: licenses = [] copyright_holders = [] manpage_is_symlink = False for line in file: # Do not create copyright stanzas for symlink files # but add them to a symlink list symlink = re.search(r"^\.so (.*)", line) if symlink: manpage_is_symlink = True # Ensure a string for the filename linkname = str(manpage) # Strip the leading "../" linkname = linkname[3:len(linkname)] filename = symlink.group(1) if filename in symlinks: symlinks[filename].append(linkname) else: symlinks[filename] = [linkname] break # Only parse the header, so stop after seeing ".TH" if re.search(r"^\.TH", line): break # Extract all copyright holders copyright = re.search(r"^\.\\\".*?Copyright (.*)", line) if copyright: copyright_holders.append(copyright.group(1)) # Match the beginning of the license license_start = re.search(r"^\.\\\" %%%LICENSE_START\((.*)\)", line) if license_start: license_name = license_start.group(1) license_short_name = get_license_shortname(license_name) if not license_short_name: sys.exit("Fatal error: Upstream license name not known: " + license_name) licenses.append(license_short_name) if not manpage_is_symlink: add_manpage_to_shortname(manpage, copyright_holders, licenses) # Flatten the symlinks by detecting and removing # a symlink which points to another symlink. for link_to_test in symlinks: # Now cycle through all symlink entries for link in symlinks: if link_to_test in symlinks[link]: symlinks[link] = symlinks[link] + symlinks[link_to_test] # Make sorting of licenses deterministic stanzas = "" license_texts = [] for shortname in sorted(licenses_with_manpages): stanzas += get_copyright_stanza(shortname, licenses_with_manpages[shortname]) text = get_license_text(shortname) if text: license_texts.append(text) # Read in the first lines of copyright, without # the automatically generated parts. Stop after # the third occurence of "License". manual_lines = "" license_line_count = 0 with open("copyright") as copyright_file: for line in copyright_file: manual_lines += line if re.search(r"^License:", line): license_line_count += 1 if license_line_count == 3: # Add a final newline for separation manual_lines += "\n" break # Open the file for output with open("copyright", "w") as copyright_file: copyright_file.write(manual_lines) copyright_file.write(stanzas) copyright_file.write("\n".join(license_texts))