diff --git a/lib/open_project/patches/plaintext_resolver.rb b/lib/open_project/patches/plaintext_resolver.rb new file mode 100644 index 00000000000..fc386e4647e --- /dev/null +++ b/lib/open_project/patches/plaintext_resolver.rb @@ -0,0 +1,47 @@ +# frozen_string_literal: true + +#-- copyright +# OpenProject is an open source project management software. +# Copyright (C) the OpenProject GmbH +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License version 3. +# +# OpenProject is a fork of ChiliProject, which is a fork of Redmine. The copyright follows: +# Copyright (C) 2006-2013 Jean-Philippe Lang +# Copyright (C) 2010-2013 the ChiliProject Team +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +# +# See COPYRIGHT and LICENSE files for more details. +#++ + +module OpenProject::Patches::PlaintextResolverPatch + # identical to the original implementation but with a mutable copy of the text + # if it's frozen. + def text + if handler = find_handler and + text = handler.text(@file, max_size: max_plaintext_bytes) + + # Handle frozen strings by creating a mutable copy + text = text.dup if text.frozen? + text.gsub!(/\s+/m, " ") + text.strip! + text.mb_chars.compose.limit(max_plaintext_bytes).to_s + end + end +end + +Plaintext::Resolver.prepend(OpenProject::Patches::PlaintextResolverPatch) diff --git a/spec/workers/attachments/extract_fulltext_job_spec.rb b/spec/workers/attachments/extract_fulltext_job_spec.rb index cd34f6ec327..3f9875137a7 100644 --- a/spec/workers/attachments/extract_fulltext_job_spec.rb +++ b/spec/workers/attachments/extract_fulltext_job_spec.rb @@ -48,11 +48,13 @@ RSpec.describe Attachments::ExtractFulltextJob, type: :job do let(:text) { "lorem ipsum" } let(:attachment) do - create(:attachment).tap do |attachment| - allow(Attachment) - .to receive(:find_by) - .with(id: attachment.id) - .and_return(attachment) + create(:attachment) + end + let(:plaintext_file_handler) do + Plaintext::Resolver.file_handlers.find { |h| h.accept? attachment.content_type }.tap do |plaintext_file_handler| + if plaintext_file_handler.nil? + fail "Plaintext::FileHandler not found for content type #{attachment.content_type}" + end end end @@ -73,7 +75,7 @@ RSpec.describe Attachments::ExtractFulltextJob, type: :job do context "with successful text extraction" do before do - allow_any_instance_of(Plaintext::Resolver).to receive(:text).and_return(text) + allow(plaintext_file_handler).to receive(:text).and_return(text) end context "when the attachment is readable" do @@ -102,8 +104,26 @@ RSpec.describe Attachments::ExtractFulltextJob, type: :job do end end + context "when the plaintext handler returns a frozen string (Bug #68047)" do + let(:frozen_text) { "frozen string".freeze } # rubocop:disable Style/RedundantFreeze + + before do + allow(plaintext_file_handler).to receive(:text).and_return(frozen_text) + end + + it "updates the attachment's DB record with fulltext, fulltext_tsv, and file_tsv" do + perform_enqueued_jobs + expect(extracted_attributes["fulltext"]).to eq frozen_text + expect(extracted_attributes["fulltext_tsv"].size).to be > 0 + expect(extracted_attributes["file_tsv"].size).to be > 0 + end + end + context "with the file not readable" do before do + allow(Attachment) + .to receive(:find_by).with(id: attachment.id) + .and_return(attachment) allow(attachment).to receive(:readable?).and_return(false) end @@ -120,7 +140,7 @@ RSpec.describe Attachments::ExtractFulltextJob, type: :job do let(:logger) { Rails.logger } before do - allow_any_instance_of(Plaintext::Resolver).to receive(:text).and_raise(exception_message) # rubocop:disable RSpec/AnyInstance + allow(plaintext_file_handler).to receive(:text).and_raise(exception_message) allow(logger).to receive(:error)