diff --git a/lefthook.yml b/lefthook.yml index 04448887bb0..295528cc711 100644 --- a/lefthook.yml +++ b/lefthook.yml @@ -31,3 +31,8 @@ pre-commit: files: git diff --name-only --staged glob: "{Gemfile.lock,frontend/package.json}" run: script/check_same_primer_view_components_version_everywhere + sort-locales: + files: git diff --name-only --staged + glob: "**/config/locales/{en,js-en}.yml" + run: python3 script/i18n/sort_locales.py {files} + stage_fixed: true diff --git a/script/i18n/requirements.txt b/script/i18n/requirements.txt new file mode 100644 index 00000000000..800deb50de9 --- /dev/null +++ b/script/i18n/requirements.txt @@ -0,0 +1,6 @@ +# Runtime dependency for script/i18n/sort_locales.py and the lefthook pre-commit hook. +# Install with: pip install -r script/i18n/requirements.txt +ruamel.yaml==0.18.10 + +# Running the tests additionally needs pytest (a dev-only tool, intentionally +# not pinned here): pip install pytest && python3 -m pytest script/i18n/ diff --git a/script/i18n/sort_locales.py b/script/i18n/sort_locales.py new file mode 100755 index 00000000000..777aedb4c14 --- /dev/null +++ b/script/i18n/sort_locales.py @@ -0,0 +1,219 @@ +#!/usr/bin/env python3 +"""Sort keys in OpenProject locale YAML files to satisfy yamllint's key-ordering. + +Sorts mapping keys recursively in Unicode codepoint order (matching yamllint's +strcoll comparison under CI's C/POSIX locale for the ASCII keys these files use). +Sequences and scalar values are left untouched. Comments, quoting and block +scalars are preserved via ruamel.yaml round-trip. + +Usage: + python3 script/i18n/sort_locales.py FILE [FILE ...] +""" +from __future__ import annotations + +import io +import sys +from pathlib import Path + +try: + from ruamel.yaml import YAML + from ruamel.yaml.comments import CommentedMap, CommentedSeq +except ModuleNotFoundError: + sys.stderr.write( + "sort_locales.py requires ruamel.yaml. Install it with:\n" + " pip install -r script/i18n/requirements.txt\n" + ) + sys.exit(1) + + +def make_yaml() -> YAML: + yaml = YAML() + yaml.preserve_quotes = True + yaml.width = 4096 + yaml.indent(mapping=2, sequence=4, offset=2) + return yaml + + +def _sort_key(key): + """Return the YAML scalar text yamllint compares against. + + ruamel parses unquoted true/false/null as Python objects, but yamllint + compares the literal scalar text, so map them back to what gets written. + """ + if isinstance(key, bool): + return "true" if key else "false" + if key is None: + return "null" + return str(key) + + +def _bare(comment_line: str) -> str: + """'# foo' -> 'foo', '#foo' -> 'foo', '#' -> ''. ruamel's comment APIs re-add '# '.""" + s = comment_line.lstrip()[1:] # drop leading whitespace and the '#' + return s[1:] if s.startswith(" ") else s + + +def _split_post(value: str): + """Split a ruamel post-comment token value into (eol, [own_lines]). + + `eol` is the comment on the same line as the key's value (or None); + `own_lines` are the own-line comments that follow it (which visually + precede the next key). All returned text is bare (no leading '#'). + """ + segments = value.split("\n") + eol = None + own_lines = [] + starts_inline = not value.startswith("\n") + for index, segment in enumerate(segments): + stripped = segment.strip() + if not stripped.startswith("#"): + continue + if index == 0 and starts_inline: + eol = _bare(stripped) + else: + own_lines.append(_bare(stripped)) + return eol, own_lines + + +def reanchor_comments(node, child_indent: int = 0) -> None: + """Re-attach own-line comments to the key they precede, so they travel + with that key when keys are reordered. Run before sort_node.""" + if isinstance(node, CommentedMap): + for key in list(node.keys()): + reanchor_comments(node[key], child_indent + 2) + + keys = list(node.keys()) + + # NOTE: we deliberately do NOT touch a mapping's leading comment + # (node.ca.comment). For nested mappings ruamel also stores that comment + # on the parent's `ca.items[key]`, which travels with the key on reorder; + # moving it here would render it twice. The only unanchored leading + # comment is the root document header, whose mapping has a single key + # (`en`) and never reorders. + + # Each key's following own-line comments -> before the next key. + for index, key in enumerate(keys): + item = node.ca.items.get(key) + if not item or item[2] is None: + continue + eol, own_lines = _split_post(item[2].value) + if not own_lines or index + 1 >= len(keys): + continue # nothing to move, or trailing comments at mapping end + node.ca.items[key][2] = None + if eol is not None: + node.yaml_add_eol_comment(eol, key) + node.yaml_set_comment_before_after_key( + keys[index + 1], before="\n".join(own_lines), indent=child_indent) + + elif isinstance(node, CommentedSeq): + for item in node: + reanchor_comments(item, child_indent + 2) + + +def sort_node(node) -> None: + """Recursively sort mapping keys in place by codepoint order.""" + if isinstance(node, CommentedMap): + for key in list(node.keys()): + sort_node(node[key]) + for key in sorted(node.keys(), key=_sort_key): + node.move_to_end(key) + elif isinstance(node, CommentedSeq): + for item in node: + sort_node(item) + + +def flatten(node, prefix=()): + """Yield (path, value) for every leaf; order-independent.""" + if isinstance(node, dict): + for key, value in node.items(): + yield from flatten(value, prefix + (str(key),)) + elif isinstance(node, list): + for index, value in enumerate(node): + yield from flatten(value, prefix + (f"[{index}]",)) + else: + yield prefix, node + + +def _normalize_eol_comment_spacing(text: str) -> str: + """Ensure at least 2 spaces before end-of-line comments (yamllint's + comments rule). Comment positions are taken from a ruamel parse, so a + '#' inside a string value is never touched.""" + data = make_yaml().load(text) + targets = [] # (0-based line, 0-based column of '#') + + def visit(node): + ca = getattr(node, "ca", None) + if ca is not None: + for _key, item in ca.items.items(): + token = item[2] if item else None + if token is not None and not token.value.startswith("\n"): + targets.append((token.start_mark.line, token.start_mark.column)) + if isinstance(node, dict): + for value in node.values(): + visit(value) + elif isinstance(node, list): + for value in node: + visit(value) + + if data is not None: + visit(data) + + lines = text.split("\n") + for line_no, col in targets: + if line_no >= len(lines): + continue + line = lines[line_no] + if col < 1 or col > len(line) or line[col] != "#": + continue + start = col + while start > 0 and line[start - 1] == " ": + start -= 1 + # only an end-of-line comment (content precedes the spaces), under-spaced + if start > 0 and (col - start) < 2: + lines[line_no] = line[:start] + " " + line[col:] + return "\n".join(lines) + + +def sort_file(path: str) -> None: + yaml = make_yaml() + text = Path(path).read_text() + + # Preserve everything up to the root `en:` line verbatim: the license + # header and any `---` document-start marker. ruamel does not reliably + # round-trip pre-document leading comments, so we never hand them to it. + lines = text.splitlines(keepends=True) + body_start = next( + (i for i, line in enumerate(lines) if line.rstrip("\n") == "en:"), None) + if body_start is None: + return # no recognizable root mapping; leave untouched + preamble = "".join(lines[:body_start]) + body = "".join(lines[body_start:]) + + data = yaml.load(body) + if data is None: + return + + before = dict(flatten(data)) + reanchor_comments(data) + sort_node(data) + after = dict(flatten(data)) + if before != after: + raise SystemExit( + f"{path}: refusing to write — sorting changed content, not just order" + ) + + buffer = io.StringIO() + yaml.dump(data, buffer) + body_out = _normalize_eol_comment_spacing(buffer.getvalue()) + body_out = body_out.rstrip("\n") + "\n" # exactly one trailing newline + Path(path).write_text(preamble + body_out) + + +def main(argv: list[str]) -> int: + for path in argv[1:]: + sort_file(path) + return 0 + + +if __name__ == "__main__": + sys.exit(main(sys.argv)) diff --git a/script/i18n/test_sort_locales.py b/script/i18n/test_sort_locales.py new file mode 100644 index 00000000000..a79e2f473fa --- /dev/null +++ b/script/i18n/test_sort_locales.py @@ -0,0 +1,211 @@ +import sys +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).parent)) +import sort_locales # noqa: E402 + + +def run_sort(tmp_path, text): + f = tmp_path / "en.yml" + f.write_text(text) + sort_locales.sort_file(str(f)) + return f.read_text() + + +def test_sorts_top_level_and_nested_keys(tmp_path): + out = run_sort(tmp_path, ( + "en:\n" + " banana: \"B\"\n" + " apple:\n" + " zebra: 2\n" + " aardvark: 1\n" + )) + assert out.index("apple:") < out.index("banana:") + assert out.index("aardvark:") < out.index("zebra:") + + +def test_own_line_comment_before_non_first_key_moves_with_it(tmp_path): + out = run_sort(tmp_path, ( + "en:\n" + " zebra: 1\n" + " # note about alpha\n" + " alpha: 2\n" + )) + assert out.index("alpha:") < out.index("zebra:") + # the comment travels with alpha and stays directly above it + assert "# note about alpha\n alpha:" in out + + +def test_leading_comment_before_first_key_stays_at_block_top(tmp_path): + # Documented behavior: a comment before the FIRST key of a mapping is + # treated as a block header and stays at the top after sorting, rather + # than following its original first key. (Dedent/first-key comments are + # not auto-relocated; they're hand-fixed during the one-time sort.) + out = run_sort(tmp_path, ( + "en:\n" + " # block header\n" + " beta: 2\n" + " alpha: 1\n" + )) + assert out.index("alpha:") < out.index("beta:") + assert out.index("# block header") < out.index("alpha:") + + +def test_sorts_quoted_keys_by_unquoted_value(tmp_path): + out = run_sort(tmp_path, ( + "en:\n" + " \"zzz\": 1\n" + " \"import/jira\": 2\n" + " aaa: 3\n" + )) + # codepoint order: aaa (97) < import/jira (105) < zzz (122) + assert out.index("aaa:") < out.index("import/jira") < out.index("zzz") + assert '"import/jira"' in out # original quoting preserved + + +def test_preserves_block_scalars(tmp_path): + out = run_sort(tmp_path, ( + "en:\n" + " zebra: \"Z\"\n" + " alpha: |\n" + " multi\n" + " line\n" + )) + assert out.index("alpha:") < out.index("zebra:") + assert "|" in out + assert " multi\n line" in out + + +def test_multiline_comment_block_moves_with_following_key(tmp_path): + out = run_sort(tmp_path, ( + "en:\n" + " zebra: 1\n" + " # explains alpha line 1\n" + " # explains alpha line 2\n" + " alpha: 2\n" + )) + # the whole block stays directly above alpha, which sorts first + assert "# explains alpha line 1\n # explains alpha line 2\n alpha:" in out + assert out.index("alpha:") < out.index("zebra:") + + +def test_eol_comment_stays_with_its_key_and_own_line_moves(tmp_path): + out = run_sort(tmp_path, ( + "en:\n" + " zebra: 1 # eol on zebra\n" + " # describes alpha\n" + " alpha: 2\n" + )) + # eol comment remains on zebra's line; own-line comment moves above alpha + assert "zebra: 1 # eol on zebra" in out + assert "# describes alpha\n alpha:" in out + assert out.index("alpha:") < out.index("zebra:") + + +import pytest # noqa: E402 +from ruamel.yaml import YAML # noqa: E402 + + +def _load(text): + return YAML().load(text) + + +def test_preserves_all_key_paths_and_values(tmp_path): + src = ( + "en:\n" + " user:\n" + " display_format: \"Display format\"\n" + " deletion: \"Deletion\"\n" + " activities:\n" + " index:\n" + " title: \"T\"\n" + ) + out = run_sort(tmp_path, src) + before = dict(sort_locales.flatten(_load(src))) + after = dict(sort_locales.flatten(_load(out))) + assert before == after # same key-paths and values, order aside + + +def test_duplicate_keys_raise(tmp_path): + with pytest.raises(Exception): + run_sort(tmp_path, ( + "en:\n" + " alpha: 1\n" + " alpha: 2\n" + )) + + +def _assert_keys_sorted(node): + if isinstance(node, dict): + keys = [sort_locales._sort_key(k) for k in node.keys()] + assert keys == sorted(keys), f"unsorted mapping: {keys}" + for value in node.values(): + _assert_keys_sorted(value) + elif isinstance(node, list): + for value in node: + _assert_keys_sorted(value) + + +def test_output_is_yamllint_ordered(tmp_path): + out = run_sort(tmp_path, ( + "en:\n" + " gamma: 3\n" + " alpha:\n" + " delta: 1\n" + " beta: 2\n" + " bool_keys:\n" + " true: t\n" + " false: f\n" + )) + _assert_keys_sorted(_load(out)) + # boolean keys sort as written: false before true + assert out.index("false:") < out.index("true:") + + +def test_preserves_license_header_and_document_marker(tmp_path): + header = ( + "#-- copyright\n" + "# OpenProject is an open source project management software.\n" + "#++\n" + "\n" + "---\n" + ) + out = run_sort(tmp_path, header + ( + "en:\n" + " zebra: 1\n" + " alpha: 2\n" + )) + # header + marker preserved verbatim and still at the very top + assert out.startswith(header) + assert out.index("alpha:") < out.index("zebra:") + + +def test_single_trailing_newline(tmp_path): + out = run_sort(tmp_path, "en:\n b: 1\n a: 2\n\n\n") + assert out.endswith("\n") + assert not out.endswith("\n\n") + + +def test_normalizes_eol_comment_spacing(tmp_path): + out = run_sort(tmp_path, ( + "en:\n" + " zebra: \"Z\" # one space before comment\n" + " alpha: \"a # b is not a comment\"\n" + )) + # the real eol comment gets two spaces; the '#' inside the string is untouched + assert '"Z" # one space before comment' in out + assert '"a # b is not a comment"' in out + assert out.index("alpha:") < out.index("zebra:") + + +def test_idempotent(tmp_path): + src = ( + "en:\n" + " gamma: 3\n" + " # note for alpha\n" + " alpha: 1\n" + " beta: 2\n" + ) + once = run_sort(tmp_path, src) + twice = run_sort(tmp_path, once) + assert once == twice