diff --git a/app/models/custom_field.rb b/app/models/custom_field.rb index fc5136fa365..7e8a58ca86c 100644 --- a/app/models/custom_field.rb +++ b/app/models/custom_field.rb @@ -32,7 +32,7 @@ class CustomField < ApplicationRecord include CustomField::OrderStatements include CustomField::CalculatedValue - normalizes :name, with: OpenProject::RemoveAsciiControlCharacters + normalizes :name, with: OpenProject::RemoveInvisibleCharacters has_many :custom_values, dependent: :delete_all # WARNING: the inverse_of option is also required in order diff --git a/app/models/projects/identifier.rb b/app/models/projects/identifier.rb index 48728006ed3..b7fa1ab2ab0 100644 --- a/app/models/projects/identifier.rb +++ b/app/models/projects/identifier.rb @@ -38,7 +38,7 @@ module Projects::Identifier included do extend FriendlyId - normalizes :identifier, with: OpenProject::RemoveAsciiControlCharacters + normalizes :identifier, with: OpenProject::RemoveInvisibleCharacters ### ID generators # There are two supported formats: diff --git a/lib/open_project/remove_ascii_control_characters.rb b/lib/open_project/remove_ascii_control_characters.rb deleted file mode 100644 index 0e189ae6b1d..00000000000 --- a/lib/open_project/remove_ascii_control_characters.rb +++ /dev/null @@ -1,10 +0,0 @@ -# frozen_string_literal: true - -module OpenProject - # Strips ASCII control characters (0x00–0x1F, 0x7F) from a string. - # Designed for use with ActiveRecord's `normalizes` API: - # - # normalizes :name, with: OpenProject::RemoveAsciiControlCharacters - # - RemoveAsciiControlCharacters = ->(value) { value.is_a?(String) ? value.gsub(/[\x00-\x1F\x7F]/, "") : value } -end diff --git a/lib/open_project/remove_invisible_characters.rb b/lib/open_project/remove_invisible_characters.rb new file mode 100644 index 00000000000..905e432f758 --- /dev/null +++ b/lib/open_project/remove_invisible_characters.rb @@ -0,0 +1,45 @@ +# frozen_string_literal: true + +#-- copyright +# OpenProject is an open source project management software. +# Copyright (C) the OpenProject GmbH +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License version 3. +# +# OpenProject is a fork of ChiliProject, which is a fork of Redmine. The copyright follows: +# Copyright (C) 2006-2013 Jean-Philippe Lang +# Copyright (C) 2010-2013 the ChiliProject Team +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +# +# See COPYRIGHT and LICENSE files for more details. +#++ + +module OpenProject + # Strips invisible characters from a string: ASCII control characters + # and Unicode zero-width characters. Designed for use with + # ActiveRecord's `normalizes` API: + # + # normalizes :name, with: OpenProject::RemoveInvisibleCharacters + # + ASCII_CONTROL_CHARACTERS = /[\x00-\x1F\x7F]/ + ZERO_WIDTH_CHARACTERS = /[\u200B\u200C\u200D\uFEFF\u2060]/ + INVISIBLE_CHARACTERS = Regexp.union(ASCII_CONTROL_CHARACTERS, ZERO_WIDTH_CHARACTERS) + + private_constant :ASCII_CONTROL_CHARACTERS, :ZERO_WIDTH_CHARACTERS, :INVISIBLE_CHARACTERS + + RemoveInvisibleCharacters = ->(value) { value.is_a?(String) ? value.gsub(INVISIBLE_CHARACTERS, "") : value } +end diff --git a/modules/documents/app/models/document.rb b/modules/documents/app/models/document.rb index 093e2b0f80a..ecb12933b31 100644 --- a/modules/documents/app/models/document.rb +++ b/modules/documents/app/models/document.rb @@ -52,6 +52,8 @@ class Document < ApplicationRecord references: :projects, date_column: "#{table_name}.created_at" + normalizes :title, with: OpenProject::RemoveInvisibleCharacters + validates :title, presence: true, length: { maximum: 255 } scope :visible, ->(user = User.current) { diff --git a/modules/documents/spec/models/document_spec.rb b/modules/documents/spec/models/document_spec.rb index 8126c6eada4..7b81a838b2d 100644 --- a/modules/documents/spec/models/document_spec.rb +++ b/modules/documents/spec/models/document_spec.rb @@ -55,6 +55,10 @@ RSpec.describe Document do it { is_expected.to belong_to(:type).class_name("DocumentType").optional } end + describe "title normalization" do + it_behaves_like "strips invisible characters", :title + end + describe "Validations" do it { is_expected.to validate_presence_of :title } it { is_expected.to validate_length_of(:title).is_at_most(255) } diff --git a/spec/lib/open_project/remove_ascii_control_characters_spec.rb b/spec/lib/open_project/remove_ascii_control_characters_spec.rb deleted file mode 100644 index d7394c18b55..00000000000 --- a/spec/lib/open_project/remove_ascii_control_characters_spec.rb +++ /dev/null @@ -1,61 +0,0 @@ -# frozen_string_literal: true - -require "spec_helper" - -RSpec.describe OpenProject::RemoveAsciiControlCharacters do - subject(:call) { described_class.call(value) } - - context "with a clean string" do - let(:value) { "Hello World" } - - it { is_expected.to eq("Hello World") } - end - - context "with newline and tab characters" do - let(:value) { "Hello\n\tWorld\r\n" } - - it { is_expected.to eq("HelloWorld") } - end - - context "with null byte" do - let(:value) { "Hello\x00World" } - - it { is_expected.to eq("HelloWorld") } - end - - context "with escape and delete characters" do - let(:value) { "Hello\x1B\x7FWorld" } - - it { is_expected.to eq("HelloWorld") } - end - - context "with a mix of control characters" do - let(:value) { "\x01He\x02llo\x03 \x04Wo\x05rld\x06" } - - it { is_expected.to eq("Hello World") } - end - - context "with Unicode characters (preserved)" do - let(:value) { "Héllo Wörld 日本語" } - - it { is_expected.to eq("Héllo Wörld 日本語") } - end - - context "with spaces and quotes (preserved)" do - let(:value) { %{It's a "test" value} } - - it { is_expected.to eq(%{It's a "test" value}) } - end - - context "with a non-string value" do - let(:value) { 42 } - - it { is_expected.to eq(42) } - end - - context "with nil" do - let(:value) { nil } - - it { is_expected.to be_nil } - end -end diff --git a/spec/lib/open_project/remove_invisible_characters_spec.rb b/spec/lib/open_project/remove_invisible_characters_spec.rb new file mode 100644 index 00000000000..cbe5f3a6e30 --- /dev/null +++ b/spec/lib/open_project/remove_invisible_characters_spec.rb @@ -0,0 +1,101 @@ +# frozen_string_literal: true + +#-- copyright +# OpenProject is an open source project management software. +# Copyright (C) the OpenProject GmbH +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License version 3. +# +# OpenProject is a fork of ChiliProject, which is a fork of Redmine. The copyright follows: +# Copyright (C) 2006-2013 Jean-Philippe Lang +# Copyright (C) 2010-2013 the ChiliProject Team +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +# +# See COPYRIGHT and LICENSE files for more details. +#++ + +require "spec_helper" + +RSpec.describe OpenProject::RemoveInvisibleCharacters do + subject(:call) { described_class.call(value) } + + context "with a clean string" do + let(:value) { "Hello World" } + + it { is_expected.to eq("Hello World") } + end + + context "with newline and tab characters" do + let(:value) { "Hello\n\tWorld\r\n" } + + it { is_expected.to eq("HelloWorld") } + end + + context "with null byte" do + let(:value) { "Hello\x00World" } + + it { is_expected.to eq("HelloWorld") } + end + + context "with escape and delete characters" do + let(:value) { "Hello\x1B\x7FWorld" } + + it { is_expected.to eq("HelloWorld") } + end + + context "with a mix of control characters" do + let(:value) { "\x01He\x02llo\x03 \x04Wo\x05rld\x06" } + + it { is_expected.to eq("Hello World") } + end + + context "with zero-width space (U+200B)" do + let(:value) { "Hello\u200BWorld" } + + it { is_expected.to eq("HelloWorld") } + end + + context "with multiple zero-width characters" do + let(:value) { "Hello\u200B\u200C\u200D\uFEFF\u2060World" } + + it { is_expected.to eq("HelloWorld") } + end + + context "with Unicode characters (preserved)" do + let(:value) { "Héllo Wörld 日本語" } + + it { is_expected.to eq("Héllo Wörld 日本語") } + end + + context "with spaces and quotes (preserved)" do + let(:value) { %{It's a "test" value} } + + it { is_expected.to eq(%{It's a "test" value}) } + end + + context "with a non-string value" do + let(:value) { 42 } + + it { is_expected.to eq(42) } + end + + context "with nil" do + let(:value) { nil } + + it { is_expected.to be_nil } + end +end diff --git a/spec/models/custom_field_spec.rb b/spec/models/custom_field_spec.rb index 96b30f29686..8a9c0177082 100644 --- a/spec/models/custom_field_spec.rb +++ b/spec/models/custom_field_spec.rb @@ -44,15 +44,7 @@ RSpec.describe CustomField do it { is_expected.to validate_presence_of(:name) } it { is_expected.to validate_length_of(:name).is_at_most(256) } - it "strips ASCII control characters on assignment" do - cf = build(:custom_field, name: "Test\nField\x00Name\t!") - expect(cf.name).to eq("TestFieldName!") - end - - it "preserves Unicode and normal characters" do - cf = build(:custom_field, name: "Héllo Wörld 日本語") - expect(cf.name).to eq("Héllo Wörld 日本語") - end + it_behaves_like "strips invisible characters", :name describe "uniqueness" do describe "WHEN value, locale and type are identical" do diff --git a/spec/models/projects/identifier_spec.rb b/spec/models/projects/identifier_spec.rb index d2ddd779436..97cbce51460 100644 --- a/spec/models/projects/identifier_spec.rb +++ b/spec/models/projects/identifier_spec.rb @@ -34,7 +34,7 @@ RSpec.describe Projects::Identifier do describe "identifier normalization" do subject { Project.new } - it { is_expected.to normalize(:identifier).from("my\n\x00project\t").to("myproject") } + it_behaves_like "strips invisible characters", :identifier end describe "url identifier" do diff --git a/spec/support/shared/strip_invisible_characters.rb b/spec/support/shared/strip_invisible_characters.rb new file mode 100644 index 00000000000..a0e9d0dfbee --- /dev/null +++ b/spec/support/shared/strip_invisible_characters.rb @@ -0,0 +1,6 @@ +# frozen_string_literal: true + +RSpec.shared_examples "strips invisible characters" do |attribute| + it { is_expected.to normalize(attribute).from("hello\n\x00world\t").to("helloworld") } + it { is_expected.to normalize(attribute).from("hello\u200Bworld\u200C").to("helloworld") } +end