mirror of
https://github.com/opf/openproject.git
synced 2026-06-14 03:30:14 +00:00
feat(i18n): add locale key sorter and pre-commit hook
Add script/i18n/sort_locales.py (ruamel.yaml round-trip) that sorts mapping keys into yamllint key-ordering while preserving comments, quoting, block scalars and the license header. It splits off the document header verbatim, re-anchors own-line comments to the key they precede, normalizes end-of-line comment spacing to >= 2 spaces, sorts by the literal YAML scalar text (so true/false keys match yamllint), and refuses to write unless key-paths and values are unchanged. Wire it into a lefthook pre-commit hook so the yamllint-checked locale files stay sorted; the existing reviewdog yamllint check remains the CI gate. requirements.txt pins only the runtime dependency (ruamel.yaml); pytest is a dev-only tool and is not pinned. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -31,3 +31,8 @@ pre-commit:
|
||||
files: git diff --name-only --staged
|
||||
glob: "{Gemfile.lock,frontend/package.json}"
|
||||
run: script/check_same_primer_view_components_version_everywhere
|
||||
sort-locales:
|
||||
files: git diff --name-only --staged
|
||||
glob: "**/config/locales/{en,js-en}.yml"
|
||||
run: python3 script/i18n/sort_locales.py {files}
|
||||
stage_fixed: true
|
||||
|
||||
@@ -0,0 +1,6 @@
|
||||
# Runtime dependency for script/i18n/sort_locales.py and the lefthook pre-commit hook.
|
||||
# Install with: pip install -r script/i18n/requirements.txt
|
||||
ruamel.yaml==0.18.10
|
||||
|
||||
# Running the tests additionally needs pytest (a dev-only tool, intentionally
|
||||
# not pinned here): pip install pytest && python3 -m pytest script/i18n/
|
||||
Executable
+219
@@ -0,0 +1,219 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Sort keys in OpenProject locale YAML files to satisfy yamllint's key-ordering.
|
||||
|
||||
Sorts mapping keys recursively in Unicode codepoint order (matching yamllint's
|
||||
strcoll comparison under CI's C/POSIX locale for the ASCII keys these files use).
|
||||
Sequences and scalar values are left untouched. Comments, quoting and block
|
||||
scalars are preserved via ruamel.yaml round-trip.
|
||||
|
||||
Usage:
|
||||
python3 script/i18n/sort_locales.py FILE [FILE ...]
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import io
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
try:
|
||||
from ruamel.yaml import YAML
|
||||
from ruamel.yaml.comments import CommentedMap, CommentedSeq
|
||||
except ModuleNotFoundError:
|
||||
sys.stderr.write(
|
||||
"sort_locales.py requires ruamel.yaml. Install it with:\n"
|
||||
" pip install -r script/i18n/requirements.txt\n"
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def make_yaml() -> YAML:
|
||||
yaml = YAML()
|
||||
yaml.preserve_quotes = True
|
||||
yaml.width = 4096
|
||||
yaml.indent(mapping=2, sequence=4, offset=2)
|
||||
return yaml
|
||||
|
||||
|
||||
def _sort_key(key):
|
||||
"""Return the YAML scalar text yamllint compares against.
|
||||
|
||||
ruamel parses unquoted true/false/null as Python objects, but yamllint
|
||||
compares the literal scalar text, so map them back to what gets written.
|
||||
"""
|
||||
if isinstance(key, bool):
|
||||
return "true" if key else "false"
|
||||
if key is None:
|
||||
return "null"
|
||||
return str(key)
|
||||
|
||||
|
||||
def _bare(comment_line: str) -> str:
|
||||
"""'# foo' -> 'foo', '#foo' -> 'foo', '#' -> ''. ruamel's comment APIs re-add '# '."""
|
||||
s = comment_line.lstrip()[1:] # drop leading whitespace and the '#'
|
||||
return s[1:] if s.startswith(" ") else s
|
||||
|
||||
|
||||
def _split_post(value: str):
|
||||
"""Split a ruamel post-comment token value into (eol, [own_lines]).
|
||||
|
||||
`eol` is the comment on the same line as the key's value (or None);
|
||||
`own_lines` are the own-line comments that follow it (which visually
|
||||
precede the next key). All returned text is bare (no leading '#').
|
||||
"""
|
||||
segments = value.split("\n")
|
||||
eol = None
|
||||
own_lines = []
|
||||
starts_inline = not value.startswith("\n")
|
||||
for index, segment in enumerate(segments):
|
||||
stripped = segment.strip()
|
||||
if not stripped.startswith("#"):
|
||||
continue
|
||||
if index == 0 and starts_inline:
|
||||
eol = _bare(stripped)
|
||||
else:
|
||||
own_lines.append(_bare(stripped))
|
||||
return eol, own_lines
|
||||
|
||||
|
||||
def reanchor_comments(node, child_indent: int = 0) -> None:
|
||||
"""Re-attach own-line comments to the key they precede, so they travel
|
||||
with that key when keys are reordered. Run before sort_node."""
|
||||
if isinstance(node, CommentedMap):
|
||||
for key in list(node.keys()):
|
||||
reanchor_comments(node[key], child_indent + 2)
|
||||
|
||||
keys = list(node.keys())
|
||||
|
||||
# NOTE: we deliberately do NOT touch a mapping's leading comment
|
||||
# (node.ca.comment). For nested mappings ruamel also stores that comment
|
||||
# on the parent's `ca.items[key]`, which travels with the key on reorder;
|
||||
# moving it here would render it twice. The only unanchored leading
|
||||
# comment is the root document header, whose mapping has a single key
|
||||
# (`en`) and never reorders.
|
||||
|
||||
# Each key's following own-line comments -> before the next key.
|
||||
for index, key in enumerate(keys):
|
||||
item = node.ca.items.get(key)
|
||||
if not item or item[2] is None:
|
||||
continue
|
||||
eol, own_lines = _split_post(item[2].value)
|
||||
if not own_lines or index + 1 >= len(keys):
|
||||
continue # nothing to move, or trailing comments at mapping end
|
||||
node.ca.items[key][2] = None
|
||||
if eol is not None:
|
||||
node.yaml_add_eol_comment(eol, key)
|
||||
node.yaml_set_comment_before_after_key(
|
||||
keys[index + 1], before="\n".join(own_lines), indent=child_indent)
|
||||
|
||||
elif isinstance(node, CommentedSeq):
|
||||
for item in node:
|
||||
reanchor_comments(item, child_indent + 2)
|
||||
|
||||
|
||||
def sort_node(node) -> None:
|
||||
"""Recursively sort mapping keys in place by codepoint order."""
|
||||
if isinstance(node, CommentedMap):
|
||||
for key in list(node.keys()):
|
||||
sort_node(node[key])
|
||||
for key in sorted(node.keys(), key=_sort_key):
|
||||
node.move_to_end(key)
|
||||
elif isinstance(node, CommentedSeq):
|
||||
for item in node:
|
||||
sort_node(item)
|
||||
|
||||
|
||||
def flatten(node, prefix=()):
|
||||
"""Yield (path, value) for every leaf; order-independent."""
|
||||
if isinstance(node, dict):
|
||||
for key, value in node.items():
|
||||
yield from flatten(value, prefix + (str(key),))
|
||||
elif isinstance(node, list):
|
||||
for index, value in enumerate(node):
|
||||
yield from flatten(value, prefix + (f"[{index}]",))
|
||||
else:
|
||||
yield prefix, node
|
||||
|
||||
|
||||
def _normalize_eol_comment_spacing(text: str) -> str:
|
||||
"""Ensure at least 2 spaces before end-of-line comments (yamllint's
|
||||
comments rule). Comment positions are taken from a ruamel parse, so a
|
||||
'#' inside a string value is never touched."""
|
||||
data = make_yaml().load(text)
|
||||
targets = [] # (0-based line, 0-based column of '#')
|
||||
|
||||
def visit(node):
|
||||
ca = getattr(node, "ca", None)
|
||||
if ca is not None:
|
||||
for _key, item in ca.items.items():
|
||||
token = item[2] if item else None
|
||||
if token is not None and not token.value.startswith("\n"):
|
||||
targets.append((token.start_mark.line, token.start_mark.column))
|
||||
if isinstance(node, dict):
|
||||
for value in node.values():
|
||||
visit(value)
|
||||
elif isinstance(node, list):
|
||||
for value in node:
|
||||
visit(value)
|
||||
|
||||
if data is not None:
|
||||
visit(data)
|
||||
|
||||
lines = text.split("\n")
|
||||
for line_no, col in targets:
|
||||
if line_no >= len(lines):
|
||||
continue
|
||||
line = lines[line_no]
|
||||
if col < 1 or col > len(line) or line[col] != "#":
|
||||
continue
|
||||
start = col
|
||||
while start > 0 and line[start - 1] == " ":
|
||||
start -= 1
|
||||
# only an end-of-line comment (content precedes the spaces), under-spaced
|
||||
if start > 0 and (col - start) < 2:
|
||||
lines[line_no] = line[:start] + " " + line[col:]
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def sort_file(path: str) -> None:
|
||||
yaml = make_yaml()
|
||||
text = Path(path).read_text()
|
||||
|
||||
# Preserve everything up to the root `en:` line verbatim: the license
|
||||
# header and any `---` document-start marker. ruamel does not reliably
|
||||
# round-trip pre-document leading comments, so we never hand them to it.
|
||||
lines = text.splitlines(keepends=True)
|
||||
body_start = next(
|
||||
(i for i, line in enumerate(lines) if line.rstrip("\n") == "en:"), None)
|
||||
if body_start is None:
|
||||
return # no recognizable root mapping; leave untouched
|
||||
preamble = "".join(lines[:body_start])
|
||||
body = "".join(lines[body_start:])
|
||||
|
||||
data = yaml.load(body)
|
||||
if data is None:
|
||||
return
|
||||
|
||||
before = dict(flatten(data))
|
||||
reanchor_comments(data)
|
||||
sort_node(data)
|
||||
after = dict(flatten(data))
|
||||
if before != after:
|
||||
raise SystemExit(
|
||||
f"{path}: refusing to write — sorting changed content, not just order"
|
||||
)
|
||||
|
||||
buffer = io.StringIO()
|
||||
yaml.dump(data, buffer)
|
||||
body_out = _normalize_eol_comment_spacing(buffer.getvalue())
|
||||
body_out = body_out.rstrip("\n") + "\n" # exactly one trailing newline
|
||||
Path(path).write_text(preamble + body_out)
|
||||
|
||||
|
||||
def main(argv: list[str]) -> int:
|
||||
for path in argv[1:]:
|
||||
sort_file(path)
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main(sys.argv))
|
||||
@@ -0,0 +1,211 @@
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent))
|
||||
import sort_locales # noqa: E402
|
||||
|
||||
|
||||
def run_sort(tmp_path, text):
|
||||
f = tmp_path / "en.yml"
|
||||
f.write_text(text)
|
||||
sort_locales.sort_file(str(f))
|
||||
return f.read_text()
|
||||
|
||||
|
||||
def test_sorts_top_level_and_nested_keys(tmp_path):
|
||||
out = run_sort(tmp_path, (
|
||||
"en:\n"
|
||||
" banana: \"B\"\n"
|
||||
" apple:\n"
|
||||
" zebra: 2\n"
|
||||
" aardvark: 1\n"
|
||||
))
|
||||
assert out.index("apple:") < out.index("banana:")
|
||||
assert out.index("aardvark:") < out.index("zebra:")
|
||||
|
||||
|
||||
def test_own_line_comment_before_non_first_key_moves_with_it(tmp_path):
|
||||
out = run_sort(tmp_path, (
|
||||
"en:\n"
|
||||
" zebra: 1\n"
|
||||
" # note about alpha\n"
|
||||
" alpha: 2\n"
|
||||
))
|
||||
assert out.index("alpha:") < out.index("zebra:")
|
||||
# the comment travels with alpha and stays directly above it
|
||||
assert "# note about alpha\n alpha:" in out
|
||||
|
||||
|
||||
def test_leading_comment_before_first_key_stays_at_block_top(tmp_path):
|
||||
# Documented behavior: a comment before the FIRST key of a mapping is
|
||||
# treated as a block header and stays at the top after sorting, rather
|
||||
# than following its original first key. (Dedent/first-key comments are
|
||||
# not auto-relocated; they're hand-fixed during the one-time sort.)
|
||||
out = run_sort(tmp_path, (
|
||||
"en:\n"
|
||||
" # block header\n"
|
||||
" beta: 2\n"
|
||||
" alpha: 1\n"
|
||||
))
|
||||
assert out.index("alpha:") < out.index("beta:")
|
||||
assert out.index("# block header") < out.index("alpha:")
|
||||
|
||||
|
||||
def test_sorts_quoted_keys_by_unquoted_value(tmp_path):
|
||||
out = run_sort(tmp_path, (
|
||||
"en:\n"
|
||||
" \"zzz\": 1\n"
|
||||
" \"import/jira\": 2\n"
|
||||
" aaa: 3\n"
|
||||
))
|
||||
# codepoint order: aaa (97) < import/jira (105) < zzz (122)
|
||||
assert out.index("aaa:") < out.index("import/jira") < out.index("zzz")
|
||||
assert '"import/jira"' in out # original quoting preserved
|
||||
|
||||
|
||||
def test_preserves_block_scalars(tmp_path):
|
||||
out = run_sort(tmp_path, (
|
||||
"en:\n"
|
||||
" zebra: \"Z\"\n"
|
||||
" alpha: |\n"
|
||||
" multi\n"
|
||||
" line\n"
|
||||
))
|
||||
assert out.index("alpha:") < out.index("zebra:")
|
||||
assert "|" in out
|
||||
assert " multi\n line" in out
|
||||
|
||||
|
||||
def test_multiline_comment_block_moves_with_following_key(tmp_path):
|
||||
out = run_sort(tmp_path, (
|
||||
"en:\n"
|
||||
" zebra: 1\n"
|
||||
" # explains alpha line 1\n"
|
||||
" # explains alpha line 2\n"
|
||||
" alpha: 2\n"
|
||||
))
|
||||
# the whole block stays directly above alpha, which sorts first
|
||||
assert "# explains alpha line 1\n # explains alpha line 2\n alpha:" in out
|
||||
assert out.index("alpha:") < out.index("zebra:")
|
||||
|
||||
|
||||
def test_eol_comment_stays_with_its_key_and_own_line_moves(tmp_path):
|
||||
out = run_sort(tmp_path, (
|
||||
"en:\n"
|
||||
" zebra: 1 # eol on zebra\n"
|
||||
" # describes alpha\n"
|
||||
" alpha: 2\n"
|
||||
))
|
||||
# eol comment remains on zebra's line; own-line comment moves above alpha
|
||||
assert "zebra: 1 # eol on zebra" in out
|
||||
assert "# describes alpha\n alpha:" in out
|
||||
assert out.index("alpha:") < out.index("zebra:")
|
||||
|
||||
|
||||
import pytest # noqa: E402
|
||||
from ruamel.yaml import YAML # noqa: E402
|
||||
|
||||
|
||||
def _load(text):
|
||||
return YAML().load(text)
|
||||
|
||||
|
||||
def test_preserves_all_key_paths_and_values(tmp_path):
|
||||
src = (
|
||||
"en:\n"
|
||||
" user:\n"
|
||||
" display_format: \"Display format\"\n"
|
||||
" deletion: \"Deletion\"\n"
|
||||
" activities:\n"
|
||||
" index:\n"
|
||||
" title: \"T\"\n"
|
||||
)
|
||||
out = run_sort(tmp_path, src)
|
||||
before = dict(sort_locales.flatten(_load(src)))
|
||||
after = dict(sort_locales.flatten(_load(out)))
|
||||
assert before == after # same key-paths and values, order aside
|
||||
|
||||
|
||||
def test_duplicate_keys_raise(tmp_path):
|
||||
with pytest.raises(Exception):
|
||||
run_sort(tmp_path, (
|
||||
"en:\n"
|
||||
" alpha: 1\n"
|
||||
" alpha: 2\n"
|
||||
))
|
||||
|
||||
|
||||
def _assert_keys_sorted(node):
|
||||
if isinstance(node, dict):
|
||||
keys = [sort_locales._sort_key(k) for k in node.keys()]
|
||||
assert keys == sorted(keys), f"unsorted mapping: {keys}"
|
||||
for value in node.values():
|
||||
_assert_keys_sorted(value)
|
||||
elif isinstance(node, list):
|
||||
for value in node:
|
||||
_assert_keys_sorted(value)
|
||||
|
||||
|
||||
def test_output_is_yamllint_ordered(tmp_path):
|
||||
out = run_sort(tmp_path, (
|
||||
"en:\n"
|
||||
" gamma: 3\n"
|
||||
" alpha:\n"
|
||||
" delta: 1\n"
|
||||
" beta: 2\n"
|
||||
" bool_keys:\n"
|
||||
" true: t\n"
|
||||
" false: f\n"
|
||||
))
|
||||
_assert_keys_sorted(_load(out))
|
||||
# boolean keys sort as written: false before true
|
||||
assert out.index("false:") < out.index("true:")
|
||||
|
||||
|
||||
def test_preserves_license_header_and_document_marker(tmp_path):
|
||||
header = (
|
||||
"#-- copyright\n"
|
||||
"# OpenProject is an open source project management software.\n"
|
||||
"#++\n"
|
||||
"\n"
|
||||
"---\n"
|
||||
)
|
||||
out = run_sort(tmp_path, header + (
|
||||
"en:\n"
|
||||
" zebra: 1\n"
|
||||
" alpha: 2\n"
|
||||
))
|
||||
# header + marker preserved verbatim and still at the very top
|
||||
assert out.startswith(header)
|
||||
assert out.index("alpha:") < out.index("zebra:")
|
||||
|
||||
|
||||
def test_single_trailing_newline(tmp_path):
|
||||
out = run_sort(tmp_path, "en:\n b: 1\n a: 2\n\n\n")
|
||||
assert out.endswith("\n")
|
||||
assert not out.endswith("\n\n")
|
||||
|
||||
|
||||
def test_normalizes_eol_comment_spacing(tmp_path):
|
||||
out = run_sort(tmp_path, (
|
||||
"en:\n"
|
||||
" zebra: \"Z\" # one space before comment\n"
|
||||
" alpha: \"a # b is not a comment\"\n"
|
||||
))
|
||||
# the real eol comment gets two spaces; the '#' inside the string is untouched
|
||||
assert '"Z" # one space before comment' in out
|
||||
assert '"a # b is not a comment"' in out
|
||||
assert out.index("alpha:") < out.index("zebra:")
|
||||
|
||||
|
||||
def test_idempotent(tmp_path):
|
||||
src = (
|
||||
"en:\n"
|
||||
" gamma: 3\n"
|
||||
" # note for alpha\n"
|
||||
" alpha: 1\n"
|
||||
" beta: 2\n"
|
||||
)
|
||||
once = run_sort(tmp_path, src)
|
||||
twice = run_sort(tmp_path, once)
|
||||
assert once == twice
|
||||
Reference in New Issue
Block a user