From a852d46cb6f17fd2453ee4690a7bba185a627b2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Oliver=20G=C3=BCnther?= Date: Fri, 29 May 2026 10:30:07 +0200 Subject: [PATCH] Be more cautious when parsing charset from `file` --- .../file_command_content_type_detector.rb | 7 ++++-- ...file_command_content_type_detector_spec.rb | 23 +++++++++++++++++++ 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/lib/open_project/file_command_content_type_detector.rb b/lib/open_project/file_command_content_type_detector.rb index f491e17dc0b..f25ee999c97 100644 --- a/lib/open_project/file_command_content_type_detector.rb +++ b/lib/open_project/file_command_content_type_detector.rb @@ -90,8 +90,11 @@ module OpenProject end def extract_mime_and_charset(type) - mime, charset_param = type.split(";", 2).map(&:strip) - charset = charset_param&.match(/\Acharset=(.+)\z/)&.[](1) + parts = type.split(";").map(&:strip) + mime = parts.first + charset = parts.drop(1) + .filter_map { |p| p.match(/\Acharset=([^\s;]+)\z/)&.[](1) } + .first charset = nil if charset == "binary" [mime, charset] end diff --git a/spec/lib/open_project/file_command_content_type_detector_spec.rb b/spec/lib/open_project/file_command_content_type_detector_spec.rb index 4f444643932..a20c848b0ca 100644 --- a/spec/lib/open_project/file_command_content_type_detector_spec.rb +++ b/spec/lib/open_project/file_command_content_type_detector_spec.rb @@ -99,6 +99,29 @@ RSpec.describe OpenProject::FileCommandContentTypeDetector do expect(Open3).to have_received(:capture2).with("file", "-b", "--mime", "--", "--help") end + describe "charset parsing edge cases" do + def detect(raw_output) + allow(Open3).to receive(:capture2).and_return [raw_output, 0] + described_class.new("any").detect + end + + it "extracts charset when followed by an extra unknown parameter" do + expect(detect("text/plain; charset=utf-8; taste=banana")).to eq(["text/plain", "utf-8"]) + end + + it "only captures the charset token, not trailing content" do + expect(detect("text/plain; charset=utf-8;extra")).to eq(["text/plain", "utf-8"]) + end + + it "returns nil charset when charset= has no value" do + expect(detect("text/plain; charset=")).to eq(["text/plain", nil]) + end + + it "ignores unrecognised parameters before charset" do + expect(detect("text/plain; taste=banana; charset=iso-8859-1")).to eq(["text/plain", "iso-8859-1"]) + end + end + describe "charset detection from real fixture files" do let(:utf8_fixture) { Rails.root.join("spec/fixtures/encoding/utf-8.txt").to_s } let(:iso8859_fixture) { Rails.root.join("spec/fixtures/encoding/iso-8859-1.txt").to_s }