fix(documents): strip invisible characters from document titles

Documents created with zero-width Unicode characters (e.g. U+200B)
in their titles become unclickable on the index page, making them
hard to manage or delete.

Introduce RemoveInvisibleCharacters normalizer, replacing the former
RemoveAsciiControlCharacters. It strips both ASCII control characters
and Unicode zero-width characters, with each category defined as a
named constant for clarity. Apply it to Document#title and update
existing callers (Project#identifier, CustomField#name).

Add a shared RSpec example "strips invisible characters" to verify
normalization consistently across all three models.
This commit is contained in:
Kabiru Mwenja
2026-03-31 18:18:41 +03:00
parent a5842882db
commit 5e3dd6a1d1
11 changed files with 162 additions and 83 deletions
+1 -1
View File
@@ -32,7 +32,7 @@ class CustomField < ApplicationRecord
include CustomField::OrderStatements
include CustomField::CalculatedValue
normalizes :name, with: OpenProject::RemoveAsciiControlCharacters
normalizes :name, with: OpenProject::RemoveInvisibleCharacters
has_many :custom_values, dependent: :delete_all
# WARNING: the inverse_of option is also required in order
+1 -1
View File
@@ -38,7 +38,7 @@ module Projects::Identifier
included do
extend FriendlyId
normalizes :identifier, with: OpenProject::RemoveAsciiControlCharacters
normalizes :identifier, with: OpenProject::RemoveInvisibleCharacters
### ID generators
# There are two supported formats:
@@ -1,10 +0,0 @@
# frozen_string_literal: true
module OpenProject
# Strips ASCII control characters (0x000x1F, 0x7F) from a string.
# Designed for use with ActiveRecord's `normalizes` API:
#
# normalizes :name, with: OpenProject::RemoveAsciiControlCharacters
#
RemoveAsciiControlCharacters = ->(value) { value.is_a?(String) ? value.gsub(/[\x00-\x1F\x7F]/, "") : value }
end
@@ -0,0 +1,45 @@
# frozen_string_literal: true
#-- copyright
# OpenProject is an open source project management software.
# Copyright (C) the OpenProject GmbH
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License version 3.
#
# OpenProject is a fork of ChiliProject, which is a fork of Redmine. The copyright follows:
# Copyright (C) 2006-2013 Jean-Philippe Lang
# Copyright (C) 2010-2013 the ChiliProject Team
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#
# See COPYRIGHT and LICENSE files for more details.
#++
module OpenProject
# Strips invisible characters from a string: ASCII control characters
# and Unicode zero-width characters. Designed for use with
# ActiveRecord's `normalizes` API:
#
# normalizes :name, with: OpenProject::RemoveInvisibleCharacters
#
ASCII_CONTROL_CHARACTERS = /[\x00-\x1F\x7F]/
ZERO_WIDTH_CHARACTERS = /[\u200B\u200C\u200D\uFEFF\u2060]/
INVISIBLE_CHARACTERS = Regexp.union(ASCII_CONTROL_CHARACTERS, ZERO_WIDTH_CHARACTERS)
private_constant :ASCII_CONTROL_CHARACTERS, :ZERO_WIDTH_CHARACTERS, :INVISIBLE_CHARACTERS
RemoveInvisibleCharacters = ->(value) { value.is_a?(String) ? value.gsub(INVISIBLE_CHARACTERS, "") : value }
end
+2
View File
@@ -52,6 +52,8 @@ class Document < ApplicationRecord
references: :projects,
date_column: "#{table_name}.created_at"
normalizes :title, with: OpenProject::RemoveInvisibleCharacters
validates :title, presence: true, length: { maximum: 255 }
scope :visible, ->(user = User.current) {
@@ -55,6 +55,10 @@ RSpec.describe Document do
it { is_expected.to belong_to(:type).class_name("DocumentType").optional }
end
describe "title normalization" do
it_behaves_like "strips invisible characters", :title
end
describe "Validations" do
it { is_expected.to validate_presence_of :title }
it { is_expected.to validate_length_of(:title).is_at_most(255) }
@@ -1,61 +0,0 @@
# frozen_string_literal: true
require "spec_helper"
RSpec.describe OpenProject::RemoveAsciiControlCharacters do
subject(:call) { described_class.call(value) }
context "with a clean string" do
let(:value) { "Hello World" }
it { is_expected.to eq("Hello World") }
end
context "with newline and tab characters" do
let(:value) { "Hello\n\tWorld\r\n" }
it { is_expected.to eq("HelloWorld") }
end
context "with null byte" do
let(:value) { "Hello\x00World" }
it { is_expected.to eq("HelloWorld") }
end
context "with escape and delete characters" do
let(:value) { "Hello\x1B\x7FWorld" }
it { is_expected.to eq("HelloWorld") }
end
context "with a mix of control characters" do
let(:value) { "\x01He\x02llo\x03 \x04Wo\x05rld\x06" }
it { is_expected.to eq("Hello World") }
end
context "with Unicode characters (preserved)" do
let(:value) { "Héllo Wörld 日本語" }
it { is_expected.to eq("Héllo Wörld 日本語") }
end
context "with spaces and quotes (preserved)" do
let(:value) { %{It's a "test" value} }
it { is_expected.to eq(%{It's a "test" value}) }
end
context "with a non-string value" do
let(:value) { 42 }
it { is_expected.to eq(42) }
end
context "with nil" do
let(:value) { nil }
it { is_expected.to be_nil }
end
end
@@ -0,0 +1,101 @@
# frozen_string_literal: true
#-- copyright
# OpenProject is an open source project management software.
# Copyright (C) the OpenProject GmbH
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License version 3.
#
# OpenProject is a fork of ChiliProject, which is a fork of Redmine. The copyright follows:
# Copyright (C) 2006-2013 Jean-Philippe Lang
# Copyright (C) 2010-2013 the ChiliProject Team
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#
# See COPYRIGHT and LICENSE files for more details.
#++
require "spec_helper"
RSpec.describe OpenProject::RemoveInvisibleCharacters do
subject(:call) { described_class.call(value) }
context "with a clean string" do
let(:value) { "Hello World" }
it { is_expected.to eq("Hello World") }
end
context "with newline and tab characters" do
let(:value) { "Hello\n\tWorld\r\n" }
it { is_expected.to eq("HelloWorld") }
end
context "with null byte" do
let(:value) { "Hello\x00World" }
it { is_expected.to eq("HelloWorld") }
end
context "with escape and delete characters" do
let(:value) { "Hello\x1B\x7FWorld" }
it { is_expected.to eq("HelloWorld") }
end
context "with a mix of control characters" do
let(:value) { "\x01He\x02llo\x03 \x04Wo\x05rld\x06" }
it { is_expected.to eq("Hello World") }
end
context "with zero-width space (U+200B)" do
let(:value) { "Hello\u200BWorld" }
it { is_expected.to eq("HelloWorld") }
end
context "with multiple zero-width characters" do
let(:value) { "Hello\u200B\u200C\u200D\uFEFF\u2060World" }
it { is_expected.to eq("HelloWorld") }
end
context "with Unicode characters (preserved)" do
let(:value) { "Héllo Wörld 日本語" }
it { is_expected.to eq("Héllo Wörld 日本語") }
end
context "with spaces and quotes (preserved)" do
let(:value) { %{It's a "test" value} }
it { is_expected.to eq(%{It's a "test" value}) }
end
context "with a non-string value" do
let(:value) { 42 }
it { is_expected.to eq(42) }
end
context "with nil" do
let(:value) { nil }
it { is_expected.to be_nil }
end
end
+1 -9
View File
@@ -44,15 +44,7 @@ RSpec.describe CustomField do
it { is_expected.to validate_presence_of(:name) }
it { is_expected.to validate_length_of(:name).is_at_most(256) }
it "strips ASCII control characters on assignment" do
cf = build(:custom_field, name: "Test\nField\x00Name\t!")
expect(cf.name).to eq("TestFieldName!")
end
it "preserves Unicode and normal characters" do
cf = build(:custom_field, name: "Héllo Wörld 日本語")
expect(cf.name).to eq("Héllo Wörld 日本語")
end
it_behaves_like "strips invisible characters", :name
describe "uniqueness" do
describe "WHEN value, locale and type are identical" do
+1 -1
View File
@@ -34,7 +34,7 @@ RSpec.describe Projects::Identifier do
describe "identifier normalization" do
subject { Project.new }
it { is_expected.to normalize(:identifier).from("my\n\x00project\t").to("myproject") }
it_behaves_like "strips invisible characters", :identifier
end
describe "url identifier" do
@@ -0,0 +1,6 @@
# frozen_string_literal: true
RSpec.shared_examples "strips invisible characters" do |attribute|
it { is_expected.to normalize(attribute).from("hello\n\x00world\t").to("helloworld") }
it { is_expected.to normalize(attribute).from("hello\u200Bworld\u200C").to("helloworld") }
end