[67642] Avoid error when attachment is not uploaded yet

https://community.openproject.org/wp/67642

Attachments::ExtractFulltextJob is run twice:
- one time on creation
- one time when the direct upload is completed

When it runs on creation, and the attachments are stored on S3, the
attachment is in 'prepared' status and is not complete yet.

Due to a bug in carrierwave which is fixed since June 2023 (see
https://github.com/carrierwaveuploader/carrierwave/issues/2524), the
`#local_file` method raises the error "NoMethodError: undefined method
'body' for nil:NilClass". There is a separate issue for upgrading this
dependency one day: https://community.openproject.org/wp/67626.

The fix is to call `#local_file` only if the attachment is readable.

Additionnally:
- error handling has been updated to raise the error instead of
  swallowing it silently, so well have proper reporting in AppSignal
  next time.
- when a custom S3 endpoint is used (for local testing with minio for
  instance), this custom endpoint is added to the CSP.

Co-authored-by: Jan Sandbrink <j.sandbrink@openproject.com>
This commit is contained in:
Christophe Bliard
2025-09-23 17:58:43 +02:00
parent fa650bc05b
commit a69859f0ec
5 changed files with 59 additions and 30 deletions
@@ -53,17 +53,17 @@ module Attachments
def init
carrierwave_uploader = @attachment.file
@file = carrierwave_uploader.local_file
@filename = carrierwave_uploader.file.filename
if @attachment.readable?
@file = carrierwave_uploader.local_file
resolver = Plaintext::Resolver.new(@file, @attachment.content_type)
@text = resolver.text
end
rescue StandardError => e
Rails.logger.error(
"Failed to extract plaintext from file #{@attachment&.id} (On domain #{Setting.host_name}): #{e}: #{e.message}"
)
log_error("Failed to extract plaintext for attachment ##{@attachment&.id}", e)
update # Still update tsv values
raise
end
def update
@@ -76,9 +76,8 @@ module Attachments
@language,
OpenProject::FullTextSearch.normalize_filename(@filename)])
rescue StandardError => e
Rails.logger.error(
"Failed to update TSV values for attachment #{@attachment&.id} (On domain #{Setting.host_name}): #{e.message[0..499]}[...]"
)
log_error("Failed to update TSV values for attachment ##{@attachment&.id}", e)
raise
end
def find_attachment(id)
@@ -92,5 +91,10 @@ module Attachments
def delete_file?
remote_file? && @file
end
def log_error(message, cause)
message += " (on domain #{Setting.host_name})"
Rails.logger.error("#{message}: #{cause.message.truncate(500, omission: '[...]')}")
end
end
end
@@ -73,7 +73,7 @@ Rails.application.config.after_initialize do
# Allow connections to S3 for BIM
if OpenProject::Configuration.fog_directory.present?
connect_src += [
"#{OpenProject::Configuration.fog_directory}.s3-#{OpenProject::Configuration.fog_credentials[:region]}.amazonaws.com"
OpenProject::Configuration.fog_s3_upload_host
]
end
@@ -131,13 +131,17 @@ module OpenProject
end
def fog_credentials
(Hash(self["fog"])["credentials"] || {}).map { |key, value| [key.to_sym, value] }.to_h
(Hash(self["fog"])["credentials"] || {}).symbolize_keys
end
def fog_directory
Hash(self["fog"])["directory"]
end
def fog_s3_upload_host
fog_credentials[:endpoint].presence || "#{fog_directory}.s3-#{fog_credentials[:region]}.amazonaws.com"
end
def file_uploader
available_file_uploaders[OpenProject::Configuration.attachments_storage.to_sym]
end
+4
View File
@@ -0,0 +1,4 @@
inherit_from: ../.rubocop.yml
Style/RescueModifier:
Enabled: false
@@ -32,8 +32,6 @@ require "spec_helper"
RSpec.describe Attachments::ExtractFulltextJob, type: :job do
subject(:extracted_attributes) do
perform_enqueued_jobs
attachment.reload
Attachment.connection.select_one <<~SQL.squish
@@ -51,10 +49,6 @@ RSpec.describe Attachments::ExtractFulltextJob, type: :job do
let(:text) { "lorem ipsum" }
let(:attachment) do
create(:attachment).tap do |attachment|
expect(Attachments::ExtractFulltextJob)
.to have_been_enqueued
.with(attachment.id)
allow(Attachment)
.to receive(:find_by)
.with(id: attachment.id)
@@ -62,17 +56,33 @@ RSpec.describe Attachments::ExtractFulltextJob, type: :job do
end
end
it "is enqueued on Attachment creation" do
attachment = create(:attachment)
expect(described_class)
.to have_been_enqueued
.with(attachment.id)
end
it "is not enqueued on Attachment update" do
attachment = create(:attachment)
clear_enqueued_jobs
attachment.touch
expect(described_class)
.not_to have_been_enqueued
end
context "with successful text extraction" do
before do
allow_any_instance_of(Plaintext::Resolver).to receive(:text).and_return(text)
end
context "attachment is readable" do
context "when the attachment is readable" do
before do
allow(attachment).to receive(:readable?).and_return(true)
end
it "updates the attachment's DB record with fulltext, fulltext_tsv, and file_tsv" do
perform_enqueued_jobs
expect(extracted_attributes["fulltext"]).to eq text
expect(extracted_attributes["fulltext_tsv"].size).to be > 0
expect(extracted_attributes["file_tsv"].size).to be > 0
@@ -83,6 +93,7 @@ RSpec.describe Attachments::ExtractFulltextJob, type: :job do
# include_examples 'no fulltext but file name saved as TSV'
it "updates the attachment's DB record with file_tsv" do
perform_enqueued_jobs
expect(extracted_attributes["fulltext"]).to be_blank
expect(extracted_attributes["fulltext_tsv"]).to be_blank
expect(extracted_attributes["file_tsv"].size).to be > 0
@@ -91,39 +102,45 @@ RSpec.describe Attachments::ExtractFulltextJob, type: :job do
end
end
shared_examples "only file name indexed" do
context "with the file not readable" do
before do
allow(attachment).to receive(:readable?).and_return(false)
end
it "updates the attachment's DB record with file_tsv" do
perform_enqueued_jobs
expect(extracted_attributes["fulltext"]).to be_blank
expect(extracted_attributes["fulltext_tsv"]).to be_blank
expect(extracted_attributes["file_tsv"].size).to be > 0
end
end
context "with the file not readable" do
before do
allow(attachment).to receive(:readable?).and_return(false)
end
include_examples "only file name indexed"
end
context "with exception in extraction" do
context "with exception during extraction" do
let(:exception_message) { "boom-internal-error" }
let(:logger) { Rails.logger }
before do
allow_any_instance_of(Plaintext::Resolver).to receive(:text).and_raise(exception_message)
allow_any_instance_of(Plaintext::Resolver).to receive(:text).and_raise(exception_message) # rubocop:disable RSpec/AnyInstance
allow(logger).to receive(:error)
allow(attachment).to receive(:readable?).and_return(true)
end
it "logs the error" do
extracted_attributes
it "raises the exception to update Job's internal state" do
expect { perform_enqueued_jobs }.to raise_error(exception_message)
end
it "logs the exception" do
perform_enqueued_jobs rescue nil
expect(logger).to have_received(:error).with(/boom-internal-error/)
end
include_examples "only file name indexed"
it "updates the attachment's DB record with file_tsv from the filename" do
perform_enqueued_jobs rescue nil
expect(extracted_attributes["fulltext"]).to be_blank
expect(extracted_attributes["fulltext_tsv"]).to be_blank
expect(extracted_attributes["file_tsv"].size).to be > 0
end
end
end