Make xml to hash conversion faster by using Ox

The hash produced by Ox is very different from the one produced by
`Hash.from_xml`. A big part of the new code is about converting from Ox
hash format to ReXML hash format so that the rest of the code works
seamlessly. The conversion code is utterly complex. Sorry for that.
This commit is contained in:
Christophe Bliard
2023-04-05 17:45:33 +02:00
parent ce65870110
commit 75e5793bf8
5 changed files with 318 additions and 2 deletions
+1
View File
@@ -30,6 +30,7 @@ source 'https://rubygems.org'
ruby '~> 3.2.1'
gem 'ox'
gem 'actionpack-xml_parser', '~> 2.0.0'
gem 'activemodel-serializers-xml', '~> 1.0.1'
gem 'activerecord-import', '~> 1.4.0'
+2
View File
@@ -641,6 +641,7 @@ GEM
openproject-token (2.2.0)
activemodel
os (1.1.4)
ox (2.14.14)
paper_trail (12.3.0)
activerecord (>= 5.2)
request_store (~> 1.1)
@@ -1071,6 +1072,7 @@ DEPENDENCIES
openproject-webhooks!
openproject-xls_export!
overviews!
ox
paper_trail (~> 12.3)
parallel_tests (~> 4.0)
pg (~> 1.4.0)
@@ -0,0 +1,112 @@
# frozen_string_literal: true
#-- copyright
# OpenProject is an open source project management software.
# Copyright (C) 2012-2023 the OpenProject GmbH
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License version 3.
#
# OpenProject is a fork of ChiliProject, which is a fork of Redmine. The copyright follows:
# Copyright (C) 2006-2013 Jean-Philippe Lang
# Copyright (C) 2010-2013 the ChiliProject Team
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#
# See COPYRIGHT and LICENSE files for more details.
#++
module OpenProject::Bim::BcfJson
class FasterConverter
# Convert the xml to hash as `Hash.from_xml` would, faster.
class << self
def xml_to_hash(xml)
hash = Ox.load(xml, mode: :hash, symbolize_keys: false)
make_ox_hash_look_like_loaded_by_rexml(hash)
end
private
def make_ox_item_look_like_loaded_by_rexml(item)
case item
when Hash
make_ox_hash_look_like_loaded_by_rexml(item)
when Array
make_ox_array_look_like_loaded_by_rexml(item)
else
item
end
end
def make_ox_hash_look_like_loaded_by_rexml(hash)
hash.transform_values! do |v|
make_ox_item_look_like_loaded_by_rexml(v)
end
end
def make_ox_array_look_like_loaded_by_rexml(array)
if array_of_single_hash?(array)
make_ox_hash_look_like_loaded_by_rexml(array[0])
elsif array_of_hashes?(array)
if array_of_similar_hashes?(array)
# array of values like [{x: 1, y: 2}, {x: -3, y: 12}, ...]
array.map! { make_ox_hash_look_like_loaded_by_rexml(_1) }
else
hash = merge_hashes_together(array)
make_ox_hash_look_like_loaded_by_rexml(hash)
end
else
array.map! do |item|
make_ox_item_look_like_loaded_by_rexml(item)
end
end
end
def array_of_single_hash?(array)
array.size == 1 && array[0].is_a?(Hash)
end
def array_of_hashes?(array)
array.all? { _1.is_a?(Hash) }
end
def array_of_similar_hashes?(array)
return false if array.empty?
return false unless array[0].is_a?(Hash)
keys = array[0].keys
size = keys.size
array.all? { |h| h.is_a?(Hash) && h.size == size && h.keys == keys }
end
def merge_hashes_together(array)
head, *tail = array
head.merge!(*tail) do |_key, oldval, newval|
case oldval
when Array then oldval << unwrap(newval)
else [oldval, newval]
end
end
end
def unwrap(value)
case value
in [element] then element
else value
end
end
end
end
end
@@ -1,3 +1,5 @@
# frozen_string_literal: true
require 'bigdecimal'
module OpenProject::Bim
@@ -26,8 +28,7 @@ module OpenProject::Bim
# Retrieve the viewpoint hash without root node, if any.
def viewpoint_hash
@viewpoint_hash ||= begin
# Load from XML using activesupport
hash = Hash.from_xml(xml)
hash = FasterConverter.xml_to_hash(xml)
hash = hash[ROOT_NODE] if hash[ROOT_NODE]
# Perform destructive transformations
@@ -0,0 +1,200 @@
# frozen_string_literal: true
#-- copyright
# OpenProject is an open source project management software.
# Copyright (C) 2012-2023 the OpenProject GmbH
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License version 3.
#
# OpenProject is a fork of ChiliProject, which is a fork of Redmine. The copyright follows:
# Copyright (C) 2006-2013 Jean-Philippe Lang
# Copyright (C) 2010-2013 the ChiliProject Team
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#
# See COPYRIGHT and LICENSE files for more details.
#++
require 'spec_helper'
RSpec.describe OpenProject::Bim::BcfJson::FasterConverter do
def pps(hash)
PP.pp(hash, +'')
end
describe '.xml_to_hash' do
it 'deals with single tag without params' do
xml = <<~XML
<Component />
XML
expect(described_class.xml_to_hash(xml)).to eq(Hash.from_xml(xml)), 'should be identical to reference implementation'
expect(described_class.xml_to_hash(xml))
.to eq("Component" => nil)
end
it 'deals with single tag with params' do
xml = <<~XML
<Component IfcGuid="3_LMnKT5PDNvFeWnu3ys5Q" />
XML
expect(described_class.xml_to_hash(xml)).to eq(Hash.from_xml(xml)), 'should be identical to reference implementation'
expect(described_class.xml_to_hash(xml))
.to eq("Component" => { "IfcGuid" => "3_LMnKT5PDNvFeWnu3ys5Q" })
end
it 'deals with single tag with inner tags' do
xml = <<~XML
<Component IfcGuid="3_LMnKT5PDNvFeWnu3ys5Q">
<OriginatingSystem>Revit</OriginatingSystem>
<AuthoringToolId>1110420</AuthoringToolId>
</Component>
XML
expect(described_class.xml_to_hash(xml)).to eq(Hash.from_xml(xml)), 'should be identical to reference implementation'
expect(described_class.xml_to_hash(xml))
.to eq(
"Component" => {
"IfcGuid" => "3_LMnKT5PDNvFeWnu3ys5Q",
"OriginatingSystem" => "Revit",
"AuthoringToolId" => "1110420"
}
)
end
it 'deals with multiple similar tags with inner tags' do
xml = <<~XML
<root>
<Component IfcGuid="3_LMnKT5PDNvFeWnu3ys5Q">
<OriginatingSystem>Revit</OriginatingSystem>
<AuthoringToolId>1110420</AuthoringToolId>
</Component>
<Component IfcGuid="3_LMnKT5PDNvFeWnu3ysAc">
<OriginatingSystem>Revit</OriginatingSystem>
<AuthoringToolId>1110632</AuthoringToolId>
</Component>
</root>
XML
expect(described_class.xml_to_hash(xml)).to eq(Hash.from_xml(xml)), 'should be identical to reference implementation'
expect(described_class.xml_to_hash(xml))
.to eq(
"root" => {
"Component" => [
{ "IfcGuid" => "3_LMnKT5PDNvFeWnu3ys5Q", "OriginatingSystem" => "Revit", "AuthoringToolId" => "1110420" },
{ "IfcGuid" => "3_LMnKT5PDNvFeWnu3ysAc", "OriginatingSystem" => "Revit", "AuthoringToolId" => "1110632" }
]
}
)
end
it 'deals with outer tag + multiple similar inner tags' do
xml = <<~XML
<Color>
<Component />
<Component />
</Color>
XML
expect(described_class.xml_to_hash(xml)).to eq(Hash.from_xml(xml)), 'should be identical to reference implementation'
expect(described_class.xml_to_hash(xml))
.to eq("Color" => { "Component" => [nil, nil] })
end
it 'deals with outer tag with params + multiple similar inner tags' do
xml = <<~XML
<Color Color="3498db">
<Component />
<Component />
</Color>
XML
expect(described_class.xml_to_hash(xml)).to eq(Hash.from_xml(xml)), 'should be identical to reference implementation'
expect(described_class.xml_to_hash(xml))
.to eq("Color" => { "Color" => "3498db", "Component" => [nil, nil] })
end
it 'deals with outer tag with params + multiple similar inner tags with params' do
xml = <<~XML
<Color Color="3498db">
<Component IfcGuid="3_LMnKT5PDNvFeWnu3ys5Q"/>
<Component IfcGuid="3_LMnKT5PDNvFeWnu3ysAc"/>
</Color>
XML
expect(described_class.xml_to_hash(xml)).to eq(Hash.from_xml(xml)), 'should be identical to reference implementation'
expect(described_class.xml_to_hash(xml))
.to eq(
"Color" => {
"Color" => "3498db",
"Component" => [
{ "IfcGuid" => "3_LMnKT5PDNvFeWnu3ys5Q" },
{ "IfcGuid" => "3_LMnKT5PDNvFeWnu3ysAc" }
]
}
)
end
it 'deals with outer tag + multiple similar inner tags with inner tags' do
xml = <<~XML
<Color>
<Component>
<AuthoringToolId>id:1</AuthoringToolId>
</Component>
<Component>
<AuthoringToolId>id:2</AuthoringToolId>
</Component>
</Color>
XML
expect(described_class.xml_to_hash(xml)).to eq(Hash.from_xml(xml)), 'should be identical to reference implementation'
expect(described_class.xml_to_hash(xml))
.to eq(
"Color" => {
"Component" => [
{ "AuthoringToolId" => "id:1" },
{ "AuthoringToolId" => "id:2" }
]
}
)
end
it 'deals with outer tag + mixed similar inner tags' do
xml = <<~XML
<Color>
<Component IfcGuid="guid:1"/>
<Component>
<AuthoringToolId>id:2</AuthoringToolId>
</Component>
<Component IfcGuid="guid:3"/>
</Color>
XML
expect(described_class.xml_to_hash(xml)).to eq(Hash.from_xml(xml)), 'should be identical to reference implementation'
expect(described_class.xml_to_hash(xml))
.to eq(
"Color" => {
"Component" => [
{ "IfcGuid" => "guid:1" },
{ "AuthoringToolId" => "id:2" },
{ "IfcGuid" => "guid:3" }
]
}
)
end
# real data tests
Rails.root.glob("modules/bim/spec/fixtures/viewpoints/*.xml").each do |xml_file|
it "converts #{xml_file.basename} like Hash.from_xml() would" do
xml = xml_file.read
fast_hash = pps(described_class.xml_to_hash(xml))
slow_hash = pps(Hash.from_xml(xml))
expect(fast_hash).to eq(slow_hash)
end
end
end
end