Extract and use charset to properly serve inline text attachments (#23432)

* Extract and use charset to properly encode attachments

* Add the content type for external URLs

* Be more cautious when parsing charset from `file`
This commit is contained in:
Oliver Günther
2026-05-29 10:51:37 +02:00
committed by GitHub
15 changed files with 291 additions and 62 deletions
@@ -36,6 +36,7 @@ module Attachments
attribute :digest
attribute :description
attribute :content_type
attribute :charset
attribute :container
attribute :container_type
attribute :author
+28 -2
View File
@@ -93,7 +93,9 @@ class Attachment < ApplicationRecord
# specifically when using S3 for attachments. In the case of S3 the file name for the downloaded
# file will still be correct as it's part of the URL before the query.
def external_url_options(expires_in: nil)
{ content_disposition: content_disposition(include_filename: false), expires_in: }
{ content_disposition: content_disposition(include_filename: false),
content_type: served_content_type,
expires_in: }
end
def external_storage?
@@ -119,6 +121,30 @@ class Attachment < ApplicationRecord
end
end
# Returns the Content-Type to use when serving this file inline in a browser.
# Text files are normalised to text/plain (prevents script execution) with an
# explicit charset. Non-inlineable files get application/octet-stream so the
# browser is forced to download them.
def served_content_type
if is_text?
"text/plain; charset=#{charset.presence || Setting.attachment_default_charset}"
elsif inlineable?
content_type
else
"application/octet-stream"
end
end
# Returns the content type to use when serving the file to a browser.
# For text files, ensures a charset is always present so browsers don't
# fall back to ISO-8859-1. Preserves the real MIME subtype (e.g. text/x-ruby)
# unlike served_content_type which normalises to text/plain for security.
def serving_content_type
return content_type unless is_text?
"#{content_type}; charset=#{charset.presence || Setting.attachment_default_charset}"
end
def visible?(user = User.current)
allowed_or_author?(user) do
container.attachments_visible?(user)
@@ -227,7 +253,7 @@ class Attachment < ApplicationRecord
end
def set_content_type(file)
self.content_type = self.class.content_type_for(file.path)
self.content_type, self.charset = OpenProject::ContentTypeDetector.new(file.path).detect_with_charset
end
def set_digest(file)
+12 -8
View File
@@ -74,6 +74,7 @@ class FogFileUploader < CarrierWave::Uploader::Base
#
# @param options [Hash] Options hash.
# @option options [String] :content_disposition Pass this content disposition to S3 so that it serves the file with it.
# @option options [String] :content_type Pass this content type to S3 so that it serves the file with it.
# @option options [DateTime] :expires_at Date at which the link should expire (default: now + 5 minutes)
# @option options [ActiveSupport::Duration] :expires_in Duration in which the link should expire.
#
@@ -82,6 +83,7 @@ class FogFileUploader < CarrierWave::Uploader::Base
url_options = {}
set_content_disposition!(url_options, options:)
set_content_type!(url_options, options:)
set_expires_at!(url_options, options:)
remote_file.url url_options
@@ -103,15 +105,17 @@ class FogFileUploader < CarrierWave::Uploader::Base
private
def set_content_disposition!(url_options, options:)
if options[:content_disposition].present?
url_options[:query] = {
# Passing this option to S3 will make it serve the file with the
# respective content disposition. Without it no content disposition
# header is sent. This only works for S3 but we don't support
# anything else anyway (see carrierwave.rb).
"response-content-disposition" => options[:content_disposition]
}
return if options[:content_disposition].blank?
(url_options[:query] ||= {})["response-content-disposition"] = options[:content_disposition]
end
def set_content_type!(url_options, options:)
return if options[:content_type].blank?
# Like the content disposition above, this makes S3 serve the file with the
# given Content-Type, overriding the stored object type.
(url_options[:query] ||= {})["response-content-type"] = options[:content_type]
end
def set_expires_at!(url_options, options:)
+5
View File
@@ -81,6 +81,11 @@ module Settings
organization_name: {
default: "My Organization"
},
attachment_default_charset: {
description: "Fallback charset used when serving text attachments whose encoding was not detected on upload",
format: :string,
default: "utf-8"
},
attachment_max_size: {
default: 5120
},
+1
View File
@@ -1808,6 +1808,7 @@ en:
attachment:
attachment_content: "Attachment content"
attachment_file_name: "Attachment file name"
charset: "Character set"
content_type: "Content-type"
downloads: "Downloads"
file: "File"
@@ -0,0 +1,35 @@
# frozen_string_literal: true
#-- copyright
# OpenProject is an open source project management software.
# Copyright (C) the OpenProject GmbH
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License version 3.
#
# OpenProject is a fork of ChiliProject, which is a fork of Redmine. The copyright follows:
# Copyright (C) 2006-2013 Jean-Philippe Lang
# Copyright (C) 2010-2013 the ChiliProject Team
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#
# See COPYRIGHT and LICENSE files for more details.
#++
class AddCharsetToAttachments < ActiveRecord::Migration[8.0]
def change
add_column :attachments, :charset, :text
end
end
+1 -10
View File
@@ -100,16 +100,7 @@ module API
end
def attachment_content_type(attachment)
if attachment.is_text?
# Even if the text mime type might differ, always output plain text
# so this doesn't get interpreted as e.g., a script or html file
"text/plain"
elsif attachment.inlineable?
attachment.content_type
else
# For security reasons, mark all non-inlinable files as an octet-stream first
"application/octet-stream"
end
attachment.served_content_type
end
def set_cache_headers
+14 -17
View File
@@ -92,19 +92,19 @@ module OpenProject
@filename = filename
end
# Returns a String describing the file's content type
def detect
if blank_name?
SENSIBLE_DEFAULT
elsif empty_file?
EMPTY_TYPE
elsif calculated_type_matches.any?
calculated_type_matches.first
else
type_from_file_command || SENSIBLE_DEFAULT
end.to_s
# Returns [mime_type, charset_or_nil], running the file command once.
def detect_with_charset
return [SENSIBLE_DEFAULT, nil] if blank_name?
return [EMPTY_TYPE, nil] if empty_file?
raw_mime, charset = FileCommandContentTypeDetector.new(@filename).detect
[resolve_mime(raw_mime), charset]
end
# Detecting only the mime type is effectively the
# first argument of +detect_with_charset+
def detect = detect_with_charset.first
private
def empty_file?
@@ -121,12 +121,9 @@ module OpenProject
MIME::Types.type_for(@filename).map(&:content_type)
end
def calculated_type_matches
possible_types.select { |content_type| content_type == type_from_file_command }
end
def type_from_file_command
@type_from_file_command ||= FileCommandContentTypeDetector.new(@filename).detect
def resolve_mime(raw_mime)
matches = possible_types.select { |ct| ct == raw_mime }
(matches.first || raw_mime || SENSIBLE_DEFAULT).to_s
end
end
end
@@ -69,23 +69,34 @@ module OpenProject
@filename = filename
end
# Returns [mime_type, charset_or_nil], e.g.:
# ["text/plain", "utf-8"]
# ["image/png", nil]
def detect
type_from_file_command
@detect ||= parse_file_command
end
private
def type_from_file_command
def parse_file_command
# On BSDs, `file` doesn't give a result code of 1 if the file doesn't exist.
type, status = Open3.capture2("file", "-b", "--mime", "--", @filename)
return [SENSIBLE_DEFAULT, nil] if type.nil? || status.to_i > 0 || type.match(/\(.*?\)/)
if type.nil? || status.to_i > 0 || type.match(/\(.*?\)/)
type = SENSIBLE_DEFAULT
end
type.split(/[:;\s]+/)[0]
extract_mime_and_charset(type.strip)
rescue StandardError => e
Rails.logger.info { "Failed to get mime type from #{@filename}: #{e} #{e.message}" }
SENSIBLE_DEFAULT
[SENSIBLE_DEFAULT, nil]
end
def extract_mime_and_charset(type)
parts = type.split(";").map(&:strip)
mime = parts.first
charset = parts.drop(1)
.filter_map { |p| p.match(/\Acharset=([^\s;]+)\z/)&.[](1) }
.first
charset = nil if charset == "binary"
[mime, charset]
end
end
end
+1
View File
@@ -0,0 +1 @@
UTF-8 encoded text with non-ASCII characters: äöü €
@@ -90,7 +90,7 @@ RSpec.describe OpenProject::ContentTypeDetector do
File.open(@filename, "w+") do |file|
file.puts "This is a text file."
file.rewind
expect(OpenProject::ContentTypeDetector.new(file.path).detect).to eq("text/plain")
expect(described_class.new(file.path).detect).to start_with("text/plain")
end
FileUtils.rm @filename
end
@@ -59,35 +59,36 @@
require "spec_helper"
RSpec.describe OpenProject::FileCommandContentTypeDetector do
it "returns a content type based on the content of the file" do
it "returns a [mime_type, charset] tuple for a text file" do
tempfile = Tempfile.new("something")
tempfile.write("This is a file.")
tempfile.rewind
expect(described_class.new(tempfile.path).detect).to eq("text/plain")
mime, charset = described_class.new(tempfile.path).detect
expect(mime).to eq("text/plain")
expect(charset).to be_a(String)
tempfile.close
end
it "returns a sensible default when the file command is missing" do
it "returns [sensible_default, nil] when the file command is missing" do
allow(Open3).to receive(:capture2).and_raise "o noes!"
filename = "/path/to/something"
expect(described_class.new(filename).detect).to eq("application/binary")
expect(described_class.new("/path/to/something").detect).to eq(["application/binary", nil])
end
it "returns a sensible default on the odd chance that run returns nil" do
it "returns [sensible_default, nil] on the odd chance that run returns nil" do
allow(Open3).to receive(:capture2).and_return [nil, 0]
expect(described_class.new("windows").detect).to eq("application/binary")
expect(described_class.new("windows").detect).to eq(["application/binary", nil])
end
it "returns a sensible default when the file command returns an error code" do
it "returns [sensible_default, nil] when the file command returns an error code" do
allow(Open3).to receive(:capture2).and_return ["text/plain", 1]
expect(described_class.new("windows").detect).to eq("application/binary")
expect(described_class.new("windows").detect).to eq(["application/binary", nil])
end
it "returns a sensible default when the file command returns a type with parentheses" do
it "returns [sensible_default, nil] when the file command returns a type with parentheses" do
allow(Open3).to receive(:capture2).and_return ["text/plain (with something)", 0]
expect(described_class.new("windows").detect).to eq("application/binary")
expect(described_class.new("windows").detect).to eq(["application/binary", nil])
end
it "uses end-of-input delimiter to prevent command injection" do
@@ -97,4 +98,47 @@ RSpec.describe OpenProject::FileCommandContentTypeDetector do
expect(Open3).to have_received(:capture2).with("file", "-b", "--mime", "--", "--help")
end
describe "charset parsing edge cases" do
def detect(raw_output)
allow(Open3).to receive(:capture2).and_return [raw_output, 0]
described_class.new("any").detect
end
it "extracts charset when followed by an extra unknown parameter" do
expect(detect("text/plain; charset=utf-8; taste=banana")).to eq(["text/plain", "utf-8"])
end
it "only captures the charset token, not trailing content" do
expect(detect("text/plain; charset=utf-8;extra")).to eq(["text/plain", "utf-8"])
end
it "returns nil charset when charset= has no value" do
expect(detect("text/plain; charset=")).to eq(["text/plain", nil])
end
it "ignores unrecognised parameters before charset" do
expect(detect("text/plain; taste=banana; charset=iso-8859-1")).to eq(["text/plain", "iso-8859-1"])
end
end
describe "charset detection from real fixture files" do
let(:utf8_fixture) { Rails.root.join("spec/fixtures/encoding/utf-8.txt").to_s }
let(:iso8859_fixture) { Rails.root.join("spec/fixtures/encoding/iso-8859-1.txt").to_s }
let(:png_fixture) { Rails.root.join("spec/fixtures/files/image.png").to_s }
it "detects utf-8 charset for a UTF-8 encoded file" do
expect(described_class.new(utf8_fixture).detect).to eq(["text/plain", "utf-8"])
end
it "detects iso-8859-1 charset for an ISO-8859-1 encoded file" do
expect(described_class.new(iso8859_fixture).detect).to eq(["text/plain", "iso-8859-1"])
end
it "returns nil charset for non-text files" do
mime, charset = described_class.new(png_fixture).detect
expect(mime).to eq("image/png")
expect(charset).to be_nil
end
end
end
+58 -1
View File
@@ -252,7 +252,7 @@ RSpec.describe Attachment do
let(:author) { create(:user) }
let(:image_path) { Rails.root.join("spec/fixtures/files/image.png") }
let(:text_path) { Rails.root.join("spec/fixtures/files/testfile.txt") }
let(:text_path) { Rails.root.join("spec/fixtures/files/testfile-utf8.txt") }
let(:binary_path) { Rails.root.join("spec/fixtures/files/textfile.txt.gz") }
let(:image_attachment) { FogAttachment.new author:, file: File.open(image_path) }
@@ -320,6 +320,11 @@ RSpec.describe Attachment do
it_behaves_like "it uses content disposition inline" do
let(:attachment) { text_attachment }
end
it "includes response-content-type with text/plain and the detected charset in the S3 URL" do
url = text_attachment.external_url.to_s
expect(url).to include "response-content-type=text%2Fplain%3B%20charset%3D"
end
end
describe "for a video file" do
@@ -351,6 +356,58 @@ RSpec.describe Attachment do
expect(binary_attachment.content_disposition).to eq "attachment; filename=textfile.txt.gz"
expect(binary_attachment.external_url.to_s).to include "response-content-disposition=attachment"
end
it "includes response-content-type application/octet-stream in the S3 URL" do
expect(binary_attachment.external_url.to_s).to include "response-content-type=application%2Foctet-stream"
end
end
end
describe "#serving_content_type" do
subject(:attachment) { described_class.new(content_type:, charset:) }
let(:charset) { nil }
context "when a text file has a detected utf-8 charset (new upload)" do
let(:content_type) { "text/plain" }
let(:charset) { "utf-8" }
it "combines content_type and charset" do
expect(attachment.serving_content_type).to eq("text/plain; charset=utf-8")
end
end
context "when a text file has a non-UTF-8 charset (e.g. ISO-8859-1)" do
let(:content_type) { "text/plain" }
let(:charset) { "iso-8859-1" }
it "uses the stored charset" do
expect(attachment.serving_content_type).to eq("text/plain; charset=iso-8859-1")
end
end
context "when a text file has no charset stored (legacy upload)" do
let(:content_type) { "text/plain" }
it "falls back to Setting.attachment_default_charset so browsers do not default to ISO-8859-1" do
expect(attachment.serving_content_type).to eq("text/plain; charset=#{Setting.attachment_default_charset}")
end
end
context "when another text subtype has no charset stored" do
let(:content_type) { "text/x-ruby" }
it "falls back to Setting.attachment_default_charset" do
expect(attachment.serving_content_type).to eq("text/x-ruby; charset=#{Setting.attachment_default_charset}")
end
end
context "when the file is not a text type" do
let(:content_type) { "image/png" }
it "returns the content type unchanged" do
expect(attachment.serving_content_type).to eq("image/png")
end
end
end
@@ -486,19 +486,75 @@ RSpec.shared_examples "an APIv3 attachment resource", content_type: :json, type:
end
end
context "for a local text file" do
context "for a local text file (no stored charset, uses configured default)" do
it_behaves_like "for a local file" do
let(:expected_content_type) { "text/plain" }
let(:expected_content_type) { "text/plain; charset=#{Setting.attachment_default_charset}" }
let(:mock_file) { FileHelpers.mock_uploaded_file name: "foobar.txt" }
let(:content_disposition) { "inline; filename=foobar.txt" }
let(:attachment) do
att = create(:attachment, container:, file: mock_file, author: current_user)
att.file.store!
att.send :write_attribute, :file, mock_file.original_filename
att.send :write_attribute, :content_type, "text/plain"
att.send :write_attribute, :charset, nil
att.save!
att
end
end
end
context "for a local JS file" do
context "for a local JS file (normalised to text/plain, uses configured default charset)" do
it_behaves_like "for a local file" do
let(:expected_content_type) { "text/plain" }
let(:expected_content_type) { "text/plain; charset=#{Setting.attachment_default_charset}" }
let(:mock_file) { FileHelpers.mock_uploaded_file name: "foobar.js", content_type: "text/x-javascript" }
let(:content_disposition) { "inline; filename=foobar.js" }
let(:attachment) do
att = create(:attachment, container:, file: mock_file, author: current_user)
att.file.store!
att.send :write_attribute, :file, mock_file.original_filename
att.send :write_attribute, :content_type, "text/x-javascript"
att.send :write_attribute, :charset, nil
att.save!
att
end
end
end
context "for a local UTF-8 text file" do
it_behaves_like "for a local file" do
let(:expected_content_type) { "text/plain; charset=utf-8" }
let(:mock_file) { FileHelpers.mock_uploaded_file name: "foobar.txt" }
let(:content_disposition) { "inline; filename=foobar.txt" }
let(:attachment) do
att = create(:attachment, container:, file: mock_file, author: current_user)
att.file.store!
att.send :write_attribute, :file, mock_file.original_filename
att.send :write_attribute, :content_type, "text/plain"
att.send :write_attribute, :charset, "utf-8"
att.save!
att
end
end
end
context "for a local ISO-8859-1 text file" do
it_behaves_like "for a local file" do
let(:expected_content_type) { "text/plain; charset=iso-8859-1" }
let(:mock_file) do
FileHelpers.mock_uploaded_file name: "iso.txt",
content: Rails.root.join("spec/fixtures/encoding/iso-8859-1.txt").binread,
binary: true
end
let(:content_disposition) { "inline; filename=iso.txt" }
let(:attachment) do
att = create(:attachment, container:, file: mock_file, author: current_user)
att.file.store!
att.send :write_attribute, :file, mock_file.original_filename
att.send :write_attribute, :content_type, "text/plain"
att.send :write_attribute, :charset, "iso-8859-1"
att.save!
att
end
end
end