#!/usr/bin/python3 # # Copyright © 2016 Dr. Tobias Quathamer # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . import re import sys import textwrap from pathlib import Path # The standard short names in Debian are defined here: # https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ license_information = [ { "shortname": "BSD-2-clause", "filename": "BSD-2-clause", "upstream_names": ["BSD_2_CLAUSE_UCB", "BSD-2-Clause"], }, { "shortname": "BSD-3-clause", "filename": "BSD-3-clause-UCB", "upstream_names": ["BSD_3_CLAUSE_UCB", "BSD-3-Clause"], }, { "shortname": "BSD-4-clause", "filename": "BSD-4-clause-UCB", "upstream_names": ["BSD_4_CLAUSE_UCB", "BSD_ONELINE_CDROM", "BSD-4-Clause-UC"], }, { "shortname": "Expat", "filename": "Expat", "upstream_names": ["PERMISSIVE_MISC", "MIT"], }, { # This shortname is not defined by the standard. "shortname": "freely-redistributable", "filename": "freely-redistributable", "upstream_names": ["FREELY_REDISTRIBUTABLE"], }, { "shortname": "GPL-1+", "filename": "GPL-1+", "upstream_names": ["gpl-1-or-later", "GPL-1.0-or-later"], }, { "shortname": "GPL-2", "filename": "GPL-2", "upstream_names": ["GPLv2_MISC", "GPLv2_ONELINE", "GPL-2.0-only"], }, { "shortname": "GPL-2+", "filename": "GPL-2+", "upstream_names": [ "GPL_NOVERSION_ONELINE", "GPLv2+", "GPLv2+_DOC_FULL", "GPLv2+_DOC_MISC", "GPLv2+_DOC_ONEPARA", "GPLv2+_SW_3_PARA", "GPLv2+_SW_ONEPARA", "GPL-2.0-or-later", ], }, { "shortname": "Linux-man-pages-1-para", "filename": "Linux-man-pages-1-para", "upstream_names": ["Linux-man-pages-1-para"], }, { "shortname": "Linux-man-pages-copyleft-2-para", "filename": "Linux-man-pages-copyleft-2-para", "upstream_names": ["Linux-man-pages-copyleft-2-para"], }, { "shortname": "Linux-man-pages-copyleft", "filename": "Linux-man-pages-copyleft", "upstream_names": ["Linux-man-pages-copyleft", "Copyleft"], }, { "shortname": "Linux-man-pages-copyleft-var", "filename": "Linux-man-pages-copyleft-var", "upstream_names": ["Linux-man-pages-copyleft-var"], }, { # This shortname is not defined by the standard. "shortname": "henry-spencer-regex", "filename": "henry-spencer-regex", "upstream_names": ["MISC"], }, { # This shortname is not defined by the standard. "shortname": "LDPv1", "filename": "LDPv1", "upstream_names": ["LDPv1"], }, { "shortname": "public-domain", "filename": "public-domain", "upstream_names": ["PUBLIC_DOMAIN"], }, { # This shortname is not defined by the standard. "shortname": "verbatim", "filename": "verbatim", "upstream_names": [ "VERBATIM", "VERBATIM_ONE_PARA", "VERBATIM_TWO_PARA", "VERBATIM_PROF", ], }, ] licenses_with_manpages = {} symlinks = {} def get_license_shortname(name): """Gets the Debian shortname for the name supplied by upstream. Includes a check that upstream's names are only assigned to one shortname. """ shortname = "" already_found = False for info in license_information: if name in info["upstream_names"]: if not already_found: shortname = info["shortname"] already_found = True else: sys.exit( "Fatal error: Upstream license name defined multiple times: " + name ) return shortname def add_manpage_to_shortname(manpage, copyright_holders, licenses): # Ensure a string for the filename filename = str(manpage) # Strip the leading "../" filename = filename[3 : len(filename)] # Common case: only one license for the manpage, so # the shortname is just e.g. "GPL-2+" shortname = " and ".join(sorted(licenses)) if shortname not in licenses_with_manpages: licenses_with_manpages[shortname] = { "files": [filename], "copyright": copyright_holders, } else: licenses_with_manpages[shortname]["files"].append(filename) # Do not add same lines twice existing_copyright_holders = licenses_with_manpages[shortname]["copyright"] joined_copyright_holders = list( set(existing_copyright_holders + copyright_holders) ) licenses_with_manpages[shortname]["copyright"] = joined_copyright_holders def get_copyright_stanza(shortname, file_info): stanza = "" # Collect files and symlinks into a common list all_files = file_info["files"] for file in file_info["files"]: if file in symlinks: all_files = all_files + symlinks[file] # Join the files into a whitespace separated list, # at most 76 characters long files = " ".join(sorted(all_files)) # The wrap is 69 + 7 (length of "Files: ") = 76 files = textwrap.wrap( files, width=69, break_long_words=False, break_on_hyphens=False ) files = "\n ".join(files) # Now format the copyright holders copyright = "\n ".join(sorted(file_info["copyright"])) # An empty field is an error, so ensure a value if len(copyright) == 0: copyright = "(could not be detected automatically)" # Finally, create the stanza stanza += "Files: " + files stanza += "\nCopyright: " + copyright stanza += "\nLicense: " + shortname + "\n\n" return stanza def get_license_text(shortname): """Gets the text for the Debian license shortname.""" text = "" for info in license_information: if info["shortname"] == shortname: text += "License: " + shortname + "\n" with open("licenses/" + info["filename"]) as licensefile: for line in licensefile: text += " " + line return text p = Path("..") for manpage in p.glob("man*/*"): with manpage.open() as file: licenses = [] copyright_holders = [] manpage_is_symlink = False for line in file: # Do not create copyright stanzas for symlink files # but add them to a symlink list symlink = re.search(r"^\.so (.*)", line) if symlink: manpage_is_symlink = True # Ensure a string for the filename linkname = str(manpage) # Strip the leading "../" linkname = linkname[3 : len(linkname)] filename = symlink.group(1) if filename in symlinks: symlinks[filename].append(linkname) else: symlinks[filename] = [linkname] break # Only parse the header, so stop after seeing ".TH" if re.search(r"^\.TH", line): break # Extract all copyright holders copyright = re.search(r"^\.\\\".*?Copyright (.*)", line) if copyright: copyright_holders.append(copyright.group(1)) # Match the beginning of the license license_start = re.search(r"^\.\\\" SPDX-License-Identifier: (.+)", line) if license_start: license_name = license_start.group(1) license_short_name = get_license_shortname(license_name) if not license_short_name: sys.exit( "Fatal error: Upstream license name not known: " + license_name ) licenses.append(license_short_name) if not manpage_is_symlink: add_manpage_to_shortname(manpage, copyright_holders, licenses) # Flatten the symlinks by detecting and removing # a symlink which points to another symlink. for link_to_test in symlinks: # Now cycle through all symlink entries for link in symlinks: if link_to_test in symlinks[link]: symlinks[link] = symlinks[link] + symlinks[link_to_test] # Make sorting of licenses deterministic stanzas = "" license_texts = [] for shortname in sorted(licenses_with_manpages): stanzas += get_copyright_stanza(shortname, licenses_with_manpages[shortname]) text = get_license_text(shortname) if text: license_texts.append(text) # Read in the first lines of copyright, without # the automatically generated parts. Stop after # the third occurence of "License". manual_lines = "" license_line_count = 0 with open("copyright") as copyright_file: for line in copyright_file: manual_lines += line if re.search(r"^License:", line): license_line_count += 1 if license_line_count == 3: # Add a final newline for separation manual_lines += "\n" break # Open the file for output with open("copyright", "w") as copyright_file: copyright_file.write(manual_lines) copyright_file.write(stanzas) copyright_file.write("\n".join(license_texts))