mirror of
https://github.com/opf/openproject.git
synced 2026-06-14 03:30:14 +00:00
also guard against invalid UTF-8 in input
This commit is contained in:
@@ -46,10 +46,16 @@ module Import
|
||||
)
|
||||
|
||||
def initialize(text)
|
||||
@text = text.dup
|
||||
# Normalize any input into a safe, mutable UTF-8 string: nil becomes "",
|
||||
# and invalid byte sequences are dropped so downstream regex/StringScanner
|
||||
# operations cannot raise ArgumentError on malformed input.
|
||||
@text = text.to_s.dup
|
||||
@text.scrub!("") unless @text.valid_encoding?
|
||||
end
|
||||
|
||||
def parse
|
||||
return N::Document.new(children: []) if @text.blank?
|
||||
|
||||
preprocess
|
||||
blocks = parse_blocks
|
||||
N::Document.new(children: blocks)
|
||||
|
||||
@@ -35,8 +35,6 @@ module Import
|
||||
end
|
||||
|
||||
def convert
|
||||
return "" if @text.blank?
|
||||
|
||||
ast = JiraWikiMarkup::Parser.new(@text).parse
|
||||
JiraWikiMarkup::Renderer.new(ast).render
|
||||
end
|
||||
|
||||
@@ -102,8 +102,6 @@ module Import
|
||||
end
|
||||
|
||||
def collect_markup_mentions(text, mention_usernames)
|
||||
return if text.blank?
|
||||
|
||||
ast = JiraWikiMarkup::Parser.new(text).parse
|
||||
collect_mentions_from_node(ast, mention_usernames)
|
||||
end
|
||||
|
||||
@@ -51,6 +51,38 @@ RSpec.describe Import::JiraWikiMarkupConverter do
|
||||
|
||||
it { is_expected.to eq("This is not {code} and not [a link]") }
|
||||
end
|
||||
|
||||
context "with invalid UTF-8 byte sequences in the input" do
|
||||
it "drops a stray invalid byte and keeps the surrounding text" do
|
||||
input = "Hello \xFF world".dup
|
||||
expect(input.valid_encoding?).to be(false)
|
||||
expect(described_class.new(input).convert).to eq("Hello world")
|
||||
end
|
||||
|
||||
it "drops a stray continuation byte" do
|
||||
input = "abc \x80 def".dup
|
||||
expect(input.valid_encoding?).to be(false)
|
||||
expect(described_class.new(input).convert).to eq("abc def")
|
||||
end
|
||||
|
||||
it "drops a truncated multi-byte sequence" do
|
||||
input = "pre \xC3 post".dup
|
||||
expect(input.valid_encoding?).to be(false)
|
||||
expect(described_class.new(input).convert).to eq("pre post")
|
||||
end
|
||||
|
||||
it "preserves valid multi-byte characters while dropping only the invalid byte" do
|
||||
input = "héllo \xFF world".dup
|
||||
expect(input.valid_encoding?).to be(false)
|
||||
expect(described_class.new(input).convert).to eq("héllo world")
|
||||
end
|
||||
|
||||
it "still parses formatting around invalid bytes inside delimiters" do
|
||||
input = "*bold\xFFtext*".dup
|
||||
expect(input.valid_encoding?).to be(false)
|
||||
expect(described_class.new(input).convert).to eq("**boldtext**")
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
describe "line ending normalization" do
|
||||
|
||||
Reference in New Issue
Block a user