mirror of
https://github.com/opf/openproject.git
synced 2026-06-13 19:20:00 +00:00
Extract and use charset to properly encode attachments
This commit is contained in:
@@ -36,6 +36,7 @@ module Attachments
|
||||
attribute :digest
|
||||
attribute :description
|
||||
attribute :content_type
|
||||
attribute :charset
|
||||
attribute :container
|
||||
attribute :container_type
|
||||
attribute :author
|
||||
|
||||
@@ -119,6 +119,17 @@ class Attachment < ApplicationRecord
|
||||
end
|
||||
end
|
||||
|
||||
# Returns the content type to use when serving the file to a browser.
|
||||
# For text files, ensures a charset is always present so browsers don't
|
||||
# fall back to ISO-8859-1.
|
||||
# We use a configurable fallback (default utf-8) so that administrators
|
||||
# can control content types for previously uploaded attachments
|
||||
def serving_content_type
|
||||
return content_type unless is_text?
|
||||
|
||||
"#{content_type}; charset=#{charset.presence || Setting.attachment_default_charset}"
|
||||
end
|
||||
|
||||
def visible?(user = User.current)
|
||||
allowed_or_author?(user) do
|
||||
container.attachments_visible?(user)
|
||||
@@ -227,7 +238,7 @@ class Attachment < ApplicationRecord
|
||||
end
|
||||
|
||||
def set_content_type(file)
|
||||
self.content_type = self.class.content_type_for(file.path)
|
||||
self.content_type, self.charset = OpenProject::ContentTypeDetector.new(file.path).detect_with_charset
|
||||
end
|
||||
|
||||
def set_digest(file)
|
||||
|
||||
@@ -81,6 +81,11 @@ module Settings
|
||||
organization_name: {
|
||||
default: "My Organization"
|
||||
},
|
||||
attachment_default_charset: {
|
||||
description: "Fallback charset used when serving text attachments whose encoding was not detected on upload",
|
||||
format: :string,
|
||||
default: "utf-8"
|
||||
},
|
||||
attachment_max_size: {
|
||||
default: 5120
|
||||
},
|
||||
|
||||
@@ -1808,6 +1808,7 @@ en:
|
||||
attachment:
|
||||
attachment_content: "Attachment content"
|
||||
attachment_file_name: "Attachment file name"
|
||||
charset: "Character set"
|
||||
content_type: "Content-type"
|
||||
downloads: "Downloads"
|
||||
file: "File"
|
||||
|
||||
@@ -0,0 +1,35 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
#-- copyright
|
||||
# OpenProject is an open source project management software.
|
||||
# Copyright (C) the OpenProject GmbH
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU General Public License version 3.
|
||||
#
|
||||
# OpenProject is a fork of ChiliProject, which is a fork of Redmine. The copyright follows:
|
||||
# Copyright (C) 2006-2013 Jean-Philippe Lang
|
||||
# Copyright (C) 2010-2013 the ChiliProject Team
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU General Public License
|
||||
# as published by the Free Software Foundation; either version 2
|
||||
# of the License, or (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
#
|
||||
# See COPYRIGHT and LICENSE files for more details.
|
||||
#++
|
||||
|
||||
class AddCharsetToAttachments < ActiveRecord::Migration[8.0]
|
||||
def change
|
||||
add_column :attachments, :charset, :text
|
||||
end
|
||||
end
|
||||
@@ -103,7 +103,8 @@ module API
|
||||
if attachment.is_text?
|
||||
# Even if the text mime type might differ, always output plain text
|
||||
# so this doesn't get interpreted as e.g., a script or html file
|
||||
"text/plain"
|
||||
charset = attachment.charset.presence || Setting.attachment_default_charset
|
||||
"text/plain; charset=#{charset}"
|
||||
elsif attachment.inlineable?
|
||||
attachment.content_type
|
||||
else
|
||||
|
||||
@@ -92,19 +92,19 @@ module OpenProject
|
||||
@filename = filename
|
||||
end
|
||||
|
||||
# Returns a String describing the file's content type
|
||||
def detect
|
||||
if blank_name?
|
||||
SENSIBLE_DEFAULT
|
||||
elsif empty_file?
|
||||
EMPTY_TYPE
|
||||
elsif calculated_type_matches.any?
|
||||
calculated_type_matches.first
|
||||
else
|
||||
type_from_file_command || SENSIBLE_DEFAULT
|
||||
end.to_s
|
||||
# Returns [mime_type, charset_or_nil], running the file command once.
|
||||
def detect_with_charset
|
||||
return [SENSIBLE_DEFAULT, nil] if blank_name?
|
||||
return [EMPTY_TYPE, nil] if empty_file?
|
||||
|
||||
raw_mime, charset = FileCommandContentTypeDetector.new(@filename).detect
|
||||
[resolve_mime(raw_mime), charset]
|
||||
end
|
||||
|
||||
# Detecting only the mime type is effectively the
|
||||
# first argument of +detect_with_charset+
|
||||
def detect = detect_with_charset.first
|
||||
|
||||
private
|
||||
|
||||
def empty_file?
|
||||
@@ -121,12 +121,9 @@ module OpenProject
|
||||
MIME::Types.type_for(@filename).map(&:content_type)
|
||||
end
|
||||
|
||||
def calculated_type_matches
|
||||
possible_types.select { |content_type| content_type == type_from_file_command }
|
||||
end
|
||||
|
||||
def type_from_file_command
|
||||
@type_from_file_command ||= FileCommandContentTypeDetector.new(@filename).detect
|
||||
def resolve_mime(raw_mime)
|
||||
matches = possible_types.select { |ct| ct == raw_mime }
|
||||
(matches.first || raw_mime || SENSIBLE_DEFAULT).to_s
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@@ -69,23 +69,31 @@ module OpenProject
|
||||
@filename = filename
|
||||
end
|
||||
|
||||
# Returns [mime_type, charset_or_nil], e.g.:
|
||||
# ["text/plain", "utf-8"]
|
||||
# ["image/png", nil]
|
||||
def detect
|
||||
type_from_file_command
|
||||
@detect ||= parse_file_command
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def type_from_file_command
|
||||
def parse_file_command
|
||||
# On BSDs, `file` doesn't give a result code of 1 if the file doesn't exist.
|
||||
type, status = Open3.capture2("file", "-b", "--mime", "--", @filename)
|
||||
return [SENSIBLE_DEFAULT, nil] if type.nil? || status.to_i > 0 || type.match(/\(.*?\)/)
|
||||
|
||||
if type.nil? || status.to_i > 0 || type.match(/\(.*?\)/)
|
||||
type = SENSIBLE_DEFAULT
|
||||
end
|
||||
type.split(/[:;\s]+/)[0]
|
||||
extract_mime_and_charset(type.strip)
|
||||
rescue StandardError => e
|
||||
Rails.logger.info { "Failed to get mime type from #{@filename}: #{e} #{e.message}" }
|
||||
SENSIBLE_DEFAULT
|
||||
[SENSIBLE_DEFAULT, nil]
|
||||
end
|
||||
|
||||
def extract_mime_and_charset(type)
|
||||
mime, charset_param = type.split(";", 2).map(&:strip)
|
||||
charset = charset_param&.match(/\Acharset=(.+)\z/)&.[](1)
|
||||
charset = nil if charset == "binary"
|
||||
[mime, charset]
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
Vendored
+1
@@ -0,0 +1 @@
|
||||
UTF-8 encoded text with non-ASCII characters: äöü €
|
||||
@@ -90,7 +90,7 @@ RSpec.describe OpenProject::ContentTypeDetector do
|
||||
File.open(@filename, "w+") do |file|
|
||||
file.puts "This is a text file."
|
||||
file.rewind
|
||||
expect(OpenProject::ContentTypeDetector.new(file.path).detect).to eq("text/plain")
|
||||
expect(described_class.new(file.path).detect).to start_with("text/plain")
|
||||
end
|
||||
FileUtils.rm @filename
|
||||
end
|
||||
|
||||
@@ -59,35 +59,36 @@
|
||||
require "spec_helper"
|
||||
|
||||
RSpec.describe OpenProject::FileCommandContentTypeDetector do
|
||||
it "returns a content type based on the content of the file" do
|
||||
it "returns a [mime_type, charset] tuple for a text file" do
|
||||
tempfile = Tempfile.new("something")
|
||||
tempfile.write("This is a file.")
|
||||
tempfile.rewind
|
||||
|
||||
expect(described_class.new(tempfile.path).detect).to eq("text/plain")
|
||||
mime, charset = described_class.new(tempfile.path).detect
|
||||
expect(mime).to eq("text/plain")
|
||||
expect(charset).to be_a(String)
|
||||
|
||||
tempfile.close
|
||||
end
|
||||
|
||||
it "returns a sensible default when the file command is missing" do
|
||||
it "returns [sensible_default, nil] when the file command is missing" do
|
||||
allow(Open3).to receive(:capture2).and_raise "o noes!"
|
||||
filename = "/path/to/something"
|
||||
expect(described_class.new(filename).detect).to eq("application/binary")
|
||||
expect(described_class.new("/path/to/something").detect).to eq(["application/binary", nil])
|
||||
end
|
||||
|
||||
it "returns a sensible default on the odd chance that run returns nil" do
|
||||
it "returns [sensible_default, nil] on the odd chance that run returns nil" do
|
||||
allow(Open3).to receive(:capture2).and_return [nil, 0]
|
||||
expect(described_class.new("windows").detect).to eq("application/binary")
|
||||
expect(described_class.new("windows").detect).to eq(["application/binary", nil])
|
||||
end
|
||||
|
||||
it "returns a sensible default when the file command returns an error code" do
|
||||
it "returns [sensible_default, nil] when the file command returns an error code" do
|
||||
allow(Open3).to receive(:capture2).and_return ["text/plain", 1]
|
||||
expect(described_class.new("windows").detect).to eq("application/binary")
|
||||
expect(described_class.new("windows").detect).to eq(["application/binary", nil])
|
||||
end
|
||||
|
||||
it "returns a sensible default when the file command returns a type with parentheses" do
|
||||
it "returns [sensible_default, nil] when the file command returns a type with parentheses" do
|
||||
allow(Open3).to receive(:capture2).and_return ["text/plain (with something)", 0]
|
||||
expect(described_class.new("windows").detect).to eq("application/binary")
|
||||
expect(described_class.new("windows").detect).to eq(["application/binary", nil])
|
||||
end
|
||||
|
||||
it "uses end-of-input delimiter to prevent command injection" do
|
||||
@@ -97,4 +98,24 @@ RSpec.describe OpenProject::FileCommandContentTypeDetector do
|
||||
|
||||
expect(Open3).to have_received(:capture2).with("file", "-b", "--mime", "--", "--help")
|
||||
end
|
||||
|
||||
describe "charset detection from real fixture files" do
|
||||
let(:utf8_fixture) { Rails.root.join("spec/fixtures/encoding/utf-8.txt").to_s }
|
||||
let(:iso8859_fixture) { Rails.root.join("spec/fixtures/encoding/iso-8859-1.txt").to_s }
|
||||
let(:png_fixture) { Rails.root.join("spec/fixtures/files/image.png").to_s }
|
||||
|
||||
it "detects utf-8 charset for a UTF-8 encoded file" do
|
||||
expect(described_class.new(utf8_fixture).detect).to eq(["text/plain", "utf-8"])
|
||||
end
|
||||
|
||||
it "detects iso-8859-1 charset for an ISO-8859-1 encoded file" do
|
||||
expect(described_class.new(iso8859_fixture).detect).to eq(["text/plain", "iso-8859-1"])
|
||||
end
|
||||
|
||||
it "returns nil charset for non-text files" do
|
||||
mime, charset = described_class.new(png_fixture).detect
|
||||
expect(mime).to eq("image/png")
|
||||
expect(charset).to be_nil
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@@ -252,7 +252,7 @@ RSpec.describe Attachment do
|
||||
let(:author) { create(:user) }
|
||||
|
||||
let(:image_path) { Rails.root.join("spec/fixtures/files/image.png") }
|
||||
let(:text_path) { Rails.root.join("spec/fixtures/files/testfile.txt") }
|
||||
let(:text_path) { Rails.root.join("spec/fixtures/files/testfile-utf8.txt") }
|
||||
let(:binary_path) { Rails.root.join("spec/fixtures/files/textfile.txt.gz") }
|
||||
|
||||
let(:image_attachment) { FogAttachment.new author:, file: File.open(image_path) }
|
||||
@@ -354,6 +354,54 @@ RSpec.describe Attachment do
|
||||
end
|
||||
end
|
||||
|
||||
describe "#serving_content_type" do
|
||||
subject(:attachment) { described_class.new(content_type:, charset:) }
|
||||
|
||||
let(:charset) { nil }
|
||||
|
||||
context "when a text file has a detected utf-8 charset (new upload)" do
|
||||
let(:content_type) { "text/plain" }
|
||||
let(:charset) { "utf-8" }
|
||||
|
||||
it "combines content_type and charset" do
|
||||
expect(attachment.serving_content_type).to eq("text/plain; charset=utf-8")
|
||||
end
|
||||
end
|
||||
|
||||
context "when a text file has a non-UTF-8 charset (e.g. ISO-8859-1)" do
|
||||
let(:content_type) { "text/plain" }
|
||||
let(:charset) { "iso-8859-1" }
|
||||
|
||||
it "uses the stored charset" do
|
||||
expect(attachment.serving_content_type).to eq("text/plain; charset=iso-8859-1")
|
||||
end
|
||||
end
|
||||
|
||||
context "when a text file has no charset stored (legacy upload)" do
|
||||
let(:content_type) { "text/plain" }
|
||||
|
||||
it "falls back to Setting.attachment_default_charset so browsers do not default to ISO-8859-1" do
|
||||
expect(attachment.serving_content_type).to eq("text/plain; charset=#{Setting.attachment_default_charset}")
|
||||
end
|
||||
end
|
||||
|
||||
context "when another text subtype has no charset stored" do
|
||||
let(:content_type) { "text/x-ruby" }
|
||||
|
||||
it "falls back to Setting.attachment_default_charset" do
|
||||
expect(attachment.serving_content_type).to eq("text/x-ruby; charset=#{Setting.attachment_default_charset}")
|
||||
end
|
||||
end
|
||||
|
||||
context "when the file is not a text type" do
|
||||
let(:content_type) { "image/png" }
|
||||
|
||||
it "returns the content type unchanged" do
|
||||
expect(attachment.serving_content_type).to eq("image/png")
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
describe "virus scan job on commit" do
|
||||
shared_let(:work_package) { create(:work_package) }
|
||||
let(:created_attachment) do
|
||||
|
||||
@@ -486,19 +486,75 @@ RSpec.shared_examples "an APIv3 attachment resource", content_type: :json, type:
|
||||
end
|
||||
end
|
||||
|
||||
context "for a local text file" do
|
||||
context "for a local text file (no stored charset, uses configured default)" do
|
||||
it_behaves_like "for a local file" do
|
||||
let(:expected_content_type) { "text/plain" }
|
||||
let(:expected_content_type) { "text/plain; charset=#{Setting.attachment_default_charset}" }
|
||||
let(:mock_file) { FileHelpers.mock_uploaded_file name: "foobar.txt" }
|
||||
let(:content_disposition) { "inline; filename=foobar.txt" }
|
||||
let(:attachment) do
|
||||
att = create(:attachment, container:, file: mock_file, author: current_user)
|
||||
att.file.store!
|
||||
att.send :write_attribute, :file, mock_file.original_filename
|
||||
att.send :write_attribute, :content_type, "text/plain"
|
||||
att.send :write_attribute, :charset, nil
|
||||
att.save!
|
||||
att
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
context "for a local JS file" do
|
||||
context "for a local JS file (normalised to text/plain, uses configured default charset)" do
|
||||
it_behaves_like "for a local file" do
|
||||
let(:expected_content_type) { "text/plain" }
|
||||
let(:expected_content_type) { "text/plain; charset=#{Setting.attachment_default_charset}" }
|
||||
let(:mock_file) { FileHelpers.mock_uploaded_file name: "foobar.js", content_type: "text/x-javascript" }
|
||||
let(:content_disposition) { "inline; filename=foobar.js" }
|
||||
let(:attachment) do
|
||||
att = create(:attachment, container:, file: mock_file, author: current_user)
|
||||
att.file.store!
|
||||
att.send :write_attribute, :file, mock_file.original_filename
|
||||
att.send :write_attribute, :content_type, "text/x-javascript"
|
||||
att.send :write_attribute, :charset, nil
|
||||
att.save!
|
||||
att
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
context "for a local UTF-8 text file" do
|
||||
it_behaves_like "for a local file" do
|
||||
let(:expected_content_type) { "text/plain; charset=utf-8" }
|
||||
let(:mock_file) { FileHelpers.mock_uploaded_file name: "foobar.txt" }
|
||||
let(:content_disposition) { "inline; filename=foobar.txt" }
|
||||
let(:attachment) do
|
||||
att = create(:attachment, container:, file: mock_file, author: current_user)
|
||||
att.file.store!
|
||||
att.send :write_attribute, :file, mock_file.original_filename
|
||||
att.send :write_attribute, :content_type, "text/plain"
|
||||
att.send :write_attribute, :charset, "utf-8"
|
||||
att.save!
|
||||
att
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
context "for a local ISO-8859-1 text file" do
|
||||
it_behaves_like "for a local file" do
|
||||
let(:expected_content_type) { "text/plain; charset=iso-8859-1" }
|
||||
let(:mock_file) do
|
||||
FileHelpers.mock_uploaded_file name: "iso.txt",
|
||||
content: Rails.root.join("spec/fixtures/encoding/iso-8859-1.txt").binread,
|
||||
binary: true
|
||||
end
|
||||
let(:content_disposition) { "inline; filename=iso.txt" }
|
||||
let(:attachment) do
|
||||
att = create(:attachment, container:, file: mock_file, author: current_user)
|
||||
att.file.store!
|
||||
att.send :write_attribute, :file, mock_file.original_filename
|
||||
att.send :write_attribute, :content_type, "text/plain"
|
||||
att.send :write_attribute, :charset, "iso-8859-1"
|
||||
att.save!
|
||||
att
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
Reference in New Issue
Block a user