diff --git a/README.md b/README.md index 3d3db933ac..ddd71eeeb2 100644 --- a/README.md +++ b/README.md @@ -311,10 +311,13 @@ ### Standalone Py2Exe Builds (Windows) ### Related scripts -* **`devscripts/update-version.py [revision]`** - Update the version number based on current date -* **`devscripts/set-variant.py variant [-M update_message]`** - Set the build variant of the executable +* **`devscripts/update-version.py`** - Update the version number based on current date. +* **`devscripts/set-variant.py`** - Set the build variant of the executable. +* **`devscripts/make_changelog.py`** - Create a markdown changelog using short commit messages and update `CONTRIBUTORS` file. * **`devscripts/make_lazy_extractors.py`** - Create lazy extractors. Running this before building the binaries (any variant) will improve their startup performance. Set the environment variable `YTDLP_NO_LAZY_EXTRACTORS=1` if you wish to forcefully disable lazy extractor loading. +Note: See their `--help` for more info. + You can also fork the project on GitHub and run your fork's [build workflow](.github/workflows/build.yml) to automatically build a full release # USAGE AND OPTIONS diff --git a/devscripts/changelog_override.json b/devscripts/changelog_override.json new file mode 100644 index 0000000000..0967ef424b --- /dev/null +++ b/devscripts/changelog_override.json @@ -0,0 +1 @@ +{} diff --git a/devscripts/changelog_override.schema.json b/devscripts/changelog_override.schema.json new file mode 100644 index 0000000000..9bd747b701 --- /dev/null +++ b/devscripts/changelog_override.schema.json @@ -0,0 +1,96 @@ +{ + "$schema": "http://json-schema.org/draft/2020-12/schema", + "type": "array", + "uniqueItems": true, + "items": { + "type": "object", + "oneOf": [ + { + "type": "object", + "properties": { + "action": { + "enum": [ + "add" + ] + }, + "when": { + "type": "string", + "pattern": "^([0-9a-f]{40}|\\d{4}\\.\\d{2}\\.\\d{2})$" + }, + "hash": { + "type": "string", + "pattern": "^[0-9a-f]{40}$" + }, + "short": { + "type": "string" + }, + "authors": { + "type": "array", + "items": { + "type": "string" + } + } + }, + "required": [ + "action", + "short" + ] + }, + { + "type": "object", + "properties": { + "action": { + "enum": [ + "remove" + ] + }, + "when": { + "type": "string", + "pattern": "^([0-9a-f]{40}|\\d{4}\\.\\d{2}\\.\\d{2})$" + }, + "hash": { + "type": "string", + "pattern": "^[0-9a-f]{40}$" + } + }, + "required": [ + "action", + "hash" + ] + }, + { + "type": "object", + "properties": { + "action": { + "enum": [ + "change" + ] + }, + "when": { + "type": "string", + "pattern": "^([0-9a-f]{40}|\\d{4}\\.\\d{2}\\.\\d{2})$" + }, + "hash": { + "type": "string", + "pattern": "^[0-9a-f]{40}$" + }, + "short": { + "type": "string" + }, + "authors": { + "type": "array", + "items": { + "type": "string" + } + } + }, + "required": [ + "action", + "hash", + "short", + "authors" + ] + } + ] + } +} diff --git a/devscripts/make_changelog.py b/devscripts/make_changelog.py new file mode 100644 index 0000000000..b66181b53d --- /dev/null +++ b/devscripts/make_changelog.py @@ -0,0 +1,491 @@ +from __future__ import annotations + +import enum +import itertools +import json +import logging +import re +import subprocess +import sys +from collections import defaultdict +from dataclasses import dataclass +from functools import lru_cache +from pathlib import Path + +BASE_URL = 'https://github.com' +LOCATION_PATH = Path(__file__).parent + +logger = logging.getLogger(__name__) + + +class CommitGroup(enum.Enum): + UPSTREAM = None + PRIORITY = 'Important' + CORE = 'Core' + EXTRACTOR = 'Extractor' + DOWNLOADER = 'Downloader' + POSTPROCESSOR = 'Postprocessor' + MISC = 'Misc.' + + @classmethod + @lru_cache + def commit_lookup(cls): + return { + name: group + for group, names in { + cls.PRIORITY: {''}, + cls.UPSTREAM: {'upstream'}, + cls.CORE: { + 'aes', + 'cache', + 'compat_utils', + 'compat', + 'cookies', + 'core', + 'dependencies', + 'jsinterp', + 'outtmpl', + 'plugins', + 'update', + 'utils', + }, + cls.MISC: { + 'build', + 'cleanup', + 'devscripts', + 'docs', + 'misc', + 'test', + }, + cls.EXTRACTOR: {'extractor', 'extractors'}, + cls.DOWNLOADER: {'downloader'}, + cls.POSTPROCESSOR: {'postprocessor'}, + }.items() + for name in names + } + + @classmethod + def get(cls, value): + result = cls.commit_lookup().get(value) + if result: + logger.debug(f'Mapped {value!r} => {result.name}') + return result + + +@dataclass +class Commit: + hash: str | None + short: str + authors: list[str] + + def __str__(self): + result = f'{self.short!r}' + + if self.hash: + result += f' ({self.hash[:7]})' + + if self.authors: + authors = ', '.join(self.authors) + result += f' by {authors}' + + return result + + +@dataclass +class CommitInfo: + details: str | None + sub_details: tuple[str, ...] + message: str + issues: list[str] + commit: Commit + fixes: list[Commit] + + def key(self): + return ((self.details or '').lower(), self.sub_details, self.message) + + +class Changelog: + MISC_RE = re.compile(r'(?:^|\b)(?:lint(?:ing)?|misc|format(?:ting)?|fixes)(?:\b|$)', re.IGNORECASE) + + def __init__(self, groups, repo): + self._groups = groups + self._repo = repo + + def __str__(self): + return '\n'.join(self._format_groups(self._groups)).replace('\t', ' ') + + def _format_groups(self, groups): + for item in CommitGroup: + group = groups[item] + if group: + yield self.format_module(item.value, group) + + def format_module(self, name, group): + result = f'\n#### {name} changes\n' if name else '\n' + return result + '\n'.join(self._format_group(group)) + + def _format_group(self, group): + sorted_group = sorted(group, key=CommitInfo.key) + detail_groups = itertools.groupby(sorted_group, lambda item: (item.details or '').lower()) + for details, items in detail_groups: + if not details: + indent = '' + else: + yield f'- {details}' + indent = '\t' + + if details == 'cleanup': + items, cleanup_misc_items = self._filter_cleanup_misc_items(items) + + sub_detail_groups = itertools.groupby(items, lambda item: item.sub_details) + for sub_details, entries in sub_detail_groups: + if not sub_details: + for entry in entries: + yield f'{indent}- {self.format_single_change(entry)}' + continue + + prefix = f'{indent}- {", ".join(sub_details)}' + entries = list(entries) + if len(entries) == 1: + yield f'{prefix}: {self.format_single_change(entries[0])}' + continue + + yield prefix + for entry in entries: + yield f'{indent}\t- {self.format_single_change(entry)}' + + if details == 'cleanup' and cleanup_misc_items: + yield from self._format_cleanup_misc_sub_group(cleanup_misc_items) + + def _filter_cleanup_misc_items(self, items): + cleanup_misc_items = defaultdict(list) + non_misc_items = [] + for item in items: + if self.MISC_RE.search(item.message): + cleanup_misc_items[tuple(item.commit.authors)].append(item) + else: + non_misc_items.append(item) + + return non_misc_items, cleanup_misc_items + + def _format_cleanup_misc_sub_group(self, group): + prefix = '\t- Miscellaneous' + if len(group) == 1: + yield f'{prefix}: {next(self._format_cleanup_misc_items(group))}' + return + + yield prefix + for message in self._format_cleanup_misc_items(group): + yield f'\t\t- {message}' + + def _format_cleanup_misc_items(self, group): + for authors, infos in group.items(): + message = ', '.join( + self._format_message_link(None, info.commit.hash) + for info in sorted(infos, key=lambda item: item.commit.hash or '')) + yield f'{message} by {self._format_authors(authors)}' + + def format_single_change(self, info): + message = self._format_message_link(info.message, info.commit.hash) + if info.issues: + message = f'{message} ({self._format_issues(info.issues)})' + + if info.commit.authors: + message = f'{message} by {self._format_authors(info.commit.authors)}' + + if info.fixes: + fix_message = ', '.join(f'{self._format_message_link(None, fix.hash)}' for fix in info.fixes) + + authors = sorted({author for fix in info.fixes for author in fix.authors}, key=str.casefold) + if authors != info.commit.authors: + fix_message = f'{fix_message} by {self._format_authors(authors)}' + + message = f'{message} (With fixes in {fix_message})' + + return message + + def _format_message_link(self, message, hash): + assert message or hash, 'Improperly defined commit message or override' + message = message if message else hash[:7] + return f'[{message}]({self.repo_url}/commit/{hash})' if hash else message + + def _format_issues(self, issues): + return ', '.join(f'[#{issue}]({self.repo_url}/issues/{issue})' for issue in issues) + + @staticmethod + def _format_authors(authors): + return ', '.join(f'[{author}]({BASE_URL}/{author})' for author in authors) + + @property + def repo_url(self): + return f'{BASE_URL}/{self._repo}' + + +class CommitRange: + COMMAND = 'git' + COMMIT_SEPARATOR = '-----' + + AUTHOR_INDICATOR_RE = re.compile(r'Authored by:? ', re.IGNORECASE) + MESSAGE_RE = re.compile(r''' + (?:\[ + (?P[^\]\/:,]+) + (?:/(?P
[^\]:,]+))? + (?:[:,](?P[^\]]+))? + \]\ )? + (?:`?(?P[^:`]+)`?: )? + (?P.+?) + (?:\ \((?P\#\d+(?:,\ \#\d+)*)\))? + ''', re.VERBOSE | re.DOTALL) + EXTRACTOR_INDICATOR_RE = re.compile(r'(?:Fix|Add)\s+Extractors?', re.IGNORECASE) + FIXES_RE = re.compile(r'(?i:Fix(?:es)?(?:\s+for)?|Revert)\s+([\da-f]{40})') + UPSTREAM_MERGE_RE = re.compile(r'Update to ytdl-commit-([\da-f]+)') + + def __init__(self, start, end, default_author=None) -> None: + self._start = start + self._end = end + self._commits, self._fixes = self._get_commits_and_fixes(default_author) + self._commits_added = [] + + @classmethod + def from_single(cls, commitish='HEAD', default_author=None): + start_commitish = cls.get_prev_tag(commitish) + end_commitish = cls.get_next_tag(commitish) + if start_commitish == end_commitish: + start_commitish = cls.get_prev_tag(f'{commitish}~') + logger.info(f'Determined range from {commitish!r}: {start_commitish}..{end_commitish}') + return cls(start_commitish, end_commitish, default_author) + + @classmethod + def get_prev_tag(cls, commitish): + command = [cls.COMMAND, 'describe', '--tags', '--abbrev=0', '--exclude=*[^0-9.]*', commitish] + return subprocess.check_output(command, text=True).strip() + + @classmethod + def get_next_tag(cls, commitish): + result = subprocess.run( + [cls.COMMAND, 'describe', '--contains', '--abbrev=0', commitish], + stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, text=True) + if result.returncode: + return 'HEAD' + + return result.stdout.partition('~')[0].strip() + + def __iter__(self): + return iter(itertools.chain(self._commits.values(), self._commits_added)) + + def __len__(self): + return len(self._commits) + len(self._commits_added) + + def __contains__(self, commit): + if isinstance(commit, Commit): + if not commit.hash: + return False + commit = commit.hash + + return commit in self._commits + + def _is_ancestor(self, commitish): + return bool(subprocess.call( + [self.COMMAND, 'merge-base', '--is-ancestor', commitish, self._start])) + + def _get_commits_and_fixes(self, default_author): + result = subprocess.check_output([ + self.COMMAND, 'log', f'--format=%H%n%s%n%b%n{self.COMMIT_SEPARATOR}', + f'{self._start}..{self._end}'], text=True) + + commits = {} + fixes = defaultdict(list) + lines = iter(result.splitlines(False)) + for line in lines: + commit_hash = line + short = next(lines) + skip = short.startswith('Release ') or short == '[version] update' + + authors = [default_author] if default_author else [] + for line in iter(lambda: next(lines), self.COMMIT_SEPARATOR): + match = self.AUTHOR_INDICATOR_RE.match(line) + if match: + authors = sorted(map(str.strip, line[match.end():].split(',')), key=str.casefold) + + commit = Commit(commit_hash, short, authors) + if skip: + logger.debug(f'Skipped commit: {commit}') + continue + + fix_match = self.FIXES_RE.search(commit.short) + if fix_match: + commitish = fix_match.group(1) + fixes[commitish].append(commit) + + commits[commit.hash] = commit + + for commitish, fix_commits in fixes.items(): + if commitish in commits: + hashes = ', '.join(commit.hash[:7] for commit in fix_commits) + logger.info(f'Found fix(es) for {commitish[:7]}: {hashes}') + for fix_commit in fix_commits: + del commits[fix_commit.hash] + else: + logger.debug(f'Commit with fixes not in changes: {commitish[:7]}') + + return commits, fixes + + def apply_overrides(self, overrides): + for override in overrides: + when = override.get('when') + if when and when not in self and when != self._start: + logger.debug(f'Ignored {when!r}, not in commits {self._start!r}') + continue + + override_hash = override.get('hash') + if override['action'] == 'add': + commit = Commit(override.get('hash'), override['short'], override.get('authors') or []) + logger.info(f'ADD {commit}') + self._commits_added.append(commit) + + elif override['action'] == 'remove': + if override_hash in self._commits: + logger.info(f'REMOVE {self._commits[override_hash]}') + del self._commits[override_hash] + + elif override['action'] == 'change': + if override_hash not in self._commits: + continue + commit = Commit(override_hash, override['short'], override['authors']) + logger.info(f'CHANGE {self._commits[commit.hash]} -> {commit}') + self._commits[commit.hash] = commit + + self._commits = {key: value for key, value in reversed(self._commits.items())} + + def groups(self): + groups = defaultdict(list) + for commit in self: + upstream_re = self.UPSTREAM_MERGE_RE.match(commit.short) + if upstream_re: + commit.short = f'[upstream] Merge up to youtube-dl {upstream_re.group(1)}' + + match = self.MESSAGE_RE.fullmatch(commit.short) + if not match: + logger.error(f'Error parsing short commit message: {commit.short!r}') + continue + + prefix, details, sub_details, sub_details_alt, message, issues = match.groups() + group = None + if prefix: + if prefix == 'priority': + prefix, _, details = (details or '').partition('/') + logger.debug(f'Priority: {message!r}') + group = CommitGroup.PRIORITY + + if not details and prefix: + if prefix not in ('core', 'downloader', 'extractor', 'misc', 'postprocessor', 'upstream'): + logger.debug(f'Replaced details with {prefix!r}') + details = prefix or None + + if details == 'common': + details = None + + if details: + details = details.strip() + + else: + group = CommitGroup.CORE + + sub_details = f'{sub_details or ""},{sub_details_alt or ""}'.lower().replace(':', ',') + sub_details = tuple(filter(None, map(str.strip, sub_details.split(',')))) + + issues = [issue.strip()[1:] for issue in issues.split(',')] if issues else [] + + if not group: + group = CommitGroup.get(prefix.lower()) + if not group: + if self.EXTRACTOR_INDICATOR_RE.search(commit.short): + group = CommitGroup.EXTRACTOR + else: + group = CommitGroup.POSTPROCESSOR + logger.warning(f'Failed to map {commit.short!r}, selected {group.name}') + + commit_info = CommitInfo( + details, sub_details, message.strip(), + issues, commit, self._fixes[commit.hash]) + logger.debug(f'Resolved {commit.short!r} to {commit_info!r}') + groups[group].append(commit_info) + + return groups + + +def get_new_contributors(contributors_path, commits): + contributors = set() + if contributors_path.exists(): + with contributors_path.open() as file: + for line in filter(None, map(str.strip, file)): + author, _, _ = line.partition(' (') + authors = author.split('/') + contributors.update(map(str.casefold, authors)) + + new_contributors = set() + for commit in commits: + for author in commit.authors: + author_folded = author.casefold() + if author_folded not in contributors: + contributors.add(author_folded) + new_contributors.add(author) + + return sorted(new_contributors, key=str.casefold) + + +if __name__ == '__main__': + import argparse + + parser = argparse.ArgumentParser( + description='Create a changelog markdown from a git commit range') + parser.add_argument( + 'commitish', default='HEAD', nargs='?', + help='The commitish to create the range from (default: %(default)s)') + parser.add_argument( + '-v', '--verbosity', action='count', default=0, + help='increase verbosity (can be used twice)') + parser.add_argument( + '-c', '--contributors', action='store_true', + help='update CONTRIBUTORS file (default: %(default)s)') + parser.add_argument( + '--contributors-path', type=Path, default=LOCATION_PATH.parent / 'CONTRIBUTORS', + help='path to the CONTRIBUTORS file') + parser.add_argument( + '--no-override', action='store_true', + help='skip override json in commit generation (default: %(default)s)') + parser.add_argument( + '--override-path', type=Path, default=LOCATION_PATH / 'changelog_override.json', + help='path to the changelog_override.json file') + parser.add_argument( + '--default-author', default='pukkandan', + help='the author to use without a author indicator (default: %(default)s)') + parser.add_argument( + '--repo', default='yt-dlp/yt-dlp', + help='the github repository to use for the operations (default: %(default)s)') + args = parser.parse_args() + + logging.basicConfig( + datefmt='%Y-%m-%d %H-%M-%S', format='{asctime} | {levelname:<8} | {message}', + level=logging.WARNING - 10 * args.verbosity, style='{', stream=sys.stderr) + + commits = CommitRange.from_single(args.commitish, args.default_author) + + if not args.no_override: + if args.override_path.exists(): + with args.override_path.open() as file: + overrides = json.load(file) + commits.apply_overrides(overrides) + else: + logger.warning(f'File {args.override_path.as_posix()} does not exist') + + logger.info(f'Loaded {len(commits)} commits') + + new_contributors = get_new_contributors(args.contributors_path, commits) + if new_contributors: + if args.contributors: + with args.contributors_path.open('a') as file: + file.writelines(f'{contributor}\n' for contributor in new_contributors) + logger.info(f'New contributors: {", ".join(new_contributors)}') + + print(Changelog(commits.groups(), args.repo))