From 75e5793bf81362ec9062dd780b2655fca5bb8c24 Mon Sep 17 00:00:00 2001 From: Christophe Bliard Date: Wed, 5 Apr 2023 17:45:33 +0200 Subject: [PATCH] Make xml to hash conversion faster by using Ox The hash produced by Ox is very different from the one produced by `Hash.from_xml`. A big part of the new code is about converting from Ox hash format to ReXML hash format so that the rest of the code works seamlessly. The conversion code is utterly complex. Sorry for that. --- Gemfile | 1 + Gemfile.lock | 2 + .../bim/bcf_json/faster_converter.rb | 112 ++++++++++ .../bim/bcf_json/viewpoint_reader.rb | 5 +- .../bcf/bcf_json/faster_converter_spec.rb | 200 ++++++++++++++++++ 5 files changed, 318 insertions(+), 2 deletions(-) create mode 100644 modules/bim/lib/open_project/bim/bcf_json/faster_converter.rb create mode 100644 modules/bim/spec/lib/open_project/bcf/bcf_json/faster_converter_spec.rb diff --git a/Gemfile b/Gemfile index 58e371e9216..d552e6cffb6 100644 --- a/Gemfile +++ b/Gemfile @@ -30,6 +30,7 @@ source 'https://rubygems.org' ruby '~> 3.2.1' +gem 'ox' gem 'actionpack-xml_parser', '~> 2.0.0' gem 'activemodel-serializers-xml', '~> 1.0.1' gem 'activerecord-import', '~> 1.4.0' diff --git a/Gemfile.lock b/Gemfile.lock index 760f85fd39a..9b964bce642 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -641,6 +641,7 @@ GEM openproject-token (2.2.0) activemodel os (1.1.4) + ox (2.14.14) paper_trail (12.3.0) activerecord (>= 5.2) request_store (~> 1.1) @@ -1071,6 +1072,7 @@ DEPENDENCIES openproject-webhooks! openproject-xls_export! overviews! + ox paper_trail (~> 12.3) parallel_tests (~> 4.0) pg (~> 1.4.0) diff --git a/modules/bim/lib/open_project/bim/bcf_json/faster_converter.rb b/modules/bim/lib/open_project/bim/bcf_json/faster_converter.rb new file mode 100644 index 00000000000..8fa8eedd5aa --- /dev/null +++ b/modules/bim/lib/open_project/bim/bcf_json/faster_converter.rb @@ -0,0 +1,112 @@ +# frozen_string_literal: true + +#-- copyright +# OpenProject is an open source project management software. +# Copyright (C) 2012-2023 the OpenProject GmbH +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License version 3. +# +# OpenProject is a fork of ChiliProject, which is a fork of Redmine. The copyright follows: +# Copyright (C) 2006-2013 Jean-Philippe Lang +# Copyright (C) 2010-2013 the ChiliProject Team +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +# +# See COPYRIGHT and LICENSE files for more details. +#++ + +module OpenProject::Bim::BcfJson + class FasterConverter + # Convert the xml to hash as `Hash.from_xml` would, faster. + class << self + def xml_to_hash(xml) + hash = Ox.load(xml, mode: :hash, symbolize_keys: false) + make_ox_hash_look_like_loaded_by_rexml(hash) + end + + private + + def make_ox_item_look_like_loaded_by_rexml(item) + case item + when Hash + make_ox_hash_look_like_loaded_by_rexml(item) + when Array + make_ox_array_look_like_loaded_by_rexml(item) + else + item + end + end + + def make_ox_hash_look_like_loaded_by_rexml(hash) + hash.transform_values! do |v| + make_ox_item_look_like_loaded_by_rexml(v) + end + end + + def make_ox_array_look_like_loaded_by_rexml(array) + if array_of_single_hash?(array) + make_ox_hash_look_like_loaded_by_rexml(array[0]) + elsif array_of_hashes?(array) + if array_of_similar_hashes?(array) + # array of values like [{x: 1, y: 2}, {x: -3, y: 12}, ...] + array.map! { make_ox_hash_look_like_loaded_by_rexml(_1) } + else + hash = merge_hashes_together(array) + make_ox_hash_look_like_loaded_by_rexml(hash) + end + else + array.map! do |item| + make_ox_item_look_like_loaded_by_rexml(item) + end + end + end + + def array_of_single_hash?(array) + array.size == 1 && array[0].is_a?(Hash) + end + + def array_of_hashes?(array) + array.all? { _1.is_a?(Hash) } + end + + def array_of_similar_hashes?(array) + return false if array.empty? + return false unless array[0].is_a?(Hash) + + keys = array[0].keys + size = keys.size + array.all? { |h| h.is_a?(Hash) && h.size == size && h.keys == keys } + end + + def merge_hashes_together(array) + head, *tail = array + head.merge!(*tail) do |_key, oldval, newval| + case oldval + when Array then oldval << unwrap(newval) + else [oldval, newval] + end + end + end + + def unwrap(value) + case value + in [element] then element + else value + end + end + end + end +end diff --git a/modules/bim/lib/open_project/bim/bcf_json/viewpoint_reader.rb b/modules/bim/lib/open_project/bim/bcf_json/viewpoint_reader.rb index 7a1025ee56d..dd764808163 100644 --- a/modules/bim/lib/open_project/bim/bcf_json/viewpoint_reader.rb +++ b/modules/bim/lib/open_project/bim/bcf_json/viewpoint_reader.rb @@ -1,3 +1,5 @@ +# frozen_string_literal: true + require 'bigdecimal' module OpenProject::Bim @@ -26,8 +28,7 @@ module OpenProject::Bim # Retrieve the viewpoint hash without root node, if any. def viewpoint_hash @viewpoint_hash ||= begin - # Load from XML using activesupport - hash = Hash.from_xml(xml) + hash = FasterConverter.xml_to_hash(xml) hash = hash[ROOT_NODE] if hash[ROOT_NODE] # Perform destructive transformations diff --git a/modules/bim/spec/lib/open_project/bcf/bcf_json/faster_converter_spec.rb b/modules/bim/spec/lib/open_project/bcf/bcf_json/faster_converter_spec.rb new file mode 100644 index 00000000000..694319db6ed --- /dev/null +++ b/modules/bim/spec/lib/open_project/bcf/bcf_json/faster_converter_spec.rb @@ -0,0 +1,200 @@ +# frozen_string_literal: true + +#-- copyright +# OpenProject is an open source project management software. +# Copyright (C) 2012-2023 the OpenProject GmbH +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License version 3. +# +# OpenProject is a fork of ChiliProject, which is a fork of Redmine. The copyright follows: +# Copyright (C) 2006-2013 Jean-Philippe Lang +# Copyright (C) 2010-2013 the ChiliProject Team +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +# +# See COPYRIGHT and LICENSE files for more details. +#++ + +require 'spec_helper' + +RSpec.describe OpenProject::Bim::BcfJson::FasterConverter do + def pps(hash) + PP.pp(hash, +'') + end + + describe '.xml_to_hash' do + it 'deals with single tag without params' do + xml = <<~XML + + XML + expect(described_class.xml_to_hash(xml)).to eq(Hash.from_xml(xml)), 'should be identical to reference implementation' + expect(described_class.xml_to_hash(xml)) + .to eq("Component" => nil) + end + + it 'deals with single tag with params' do + xml = <<~XML + + XML + expect(described_class.xml_to_hash(xml)).to eq(Hash.from_xml(xml)), 'should be identical to reference implementation' + expect(described_class.xml_to_hash(xml)) + .to eq("Component" => { "IfcGuid" => "3_LMnKT5PDNvFeWnu3ys5Q" }) + end + + it 'deals with single tag with inner tags' do + xml = <<~XML + + Revit + 1110420 + + XML + expect(described_class.xml_to_hash(xml)).to eq(Hash.from_xml(xml)), 'should be identical to reference implementation' + expect(described_class.xml_to_hash(xml)) + .to eq( + "Component" => { + "IfcGuid" => "3_LMnKT5PDNvFeWnu3ys5Q", + "OriginatingSystem" => "Revit", + "AuthoringToolId" => "1110420" + } + ) + end + + it 'deals with multiple similar tags with inner tags' do + xml = <<~XML + + + Revit + 1110420 + + + Revit + 1110632 + + + XML + expect(described_class.xml_to_hash(xml)).to eq(Hash.from_xml(xml)), 'should be identical to reference implementation' + expect(described_class.xml_to_hash(xml)) + .to eq( + "root" => { + "Component" => [ + { "IfcGuid" => "3_LMnKT5PDNvFeWnu3ys5Q", "OriginatingSystem" => "Revit", "AuthoringToolId" => "1110420" }, + { "IfcGuid" => "3_LMnKT5PDNvFeWnu3ysAc", "OriginatingSystem" => "Revit", "AuthoringToolId" => "1110632" } + ] + } + ) + end + + it 'deals with outer tag + multiple similar inner tags' do + xml = <<~XML + + + + + XML + expect(described_class.xml_to_hash(xml)).to eq(Hash.from_xml(xml)), 'should be identical to reference implementation' + expect(described_class.xml_to_hash(xml)) + .to eq("Color" => { "Component" => [nil, nil] }) + end + + it 'deals with outer tag with params + multiple similar inner tags' do + xml = <<~XML + + + + + XML + expect(described_class.xml_to_hash(xml)).to eq(Hash.from_xml(xml)), 'should be identical to reference implementation' + expect(described_class.xml_to_hash(xml)) + .to eq("Color" => { "Color" => "3498db", "Component" => [nil, nil] }) + end + + it 'deals with outer tag with params + multiple similar inner tags with params' do + xml = <<~XML + + + + + XML + expect(described_class.xml_to_hash(xml)).to eq(Hash.from_xml(xml)), 'should be identical to reference implementation' + expect(described_class.xml_to_hash(xml)) + .to eq( + "Color" => { + "Color" => "3498db", + "Component" => [ + { "IfcGuid" => "3_LMnKT5PDNvFeWnu3ys5Q" }, + { "IfcGuid" => "3_LMnKT5PDNvFeWnu3ysAc" } + ] + } + ) + end + + it 'deals with outer tag + multiple similar inner tags with inner tags' do + xml = <<~XML + + + id:1 + + + id:2 + + + XML + expect(described_class.xml_to_hash(xml)).to eq(Hash.from_xml(xml)), 'should be identical to reference implementation' + expect(described_class.xml_to_hash(xml)) + .to eq( + "Color" => { + "Component" => [ + { "AuthoringToolId" => "id:1" }, + { "AuthoringToolId" => "id:2" } + ] + } + ) + end + + it 'deals with outer tag + mixed similar inner tags' do + xml = <<~XML + + + + id:2 + + + + XML + expect(described_class.xml_to_hash(xml)).to eq(Hash.from_xml(xml)), 'should be identical to reference implementation' + expect(described_class.xml_to_hash(xml)) + .to eq( + "Color" => { + "Component" => [ + { "IfcGuid" => "guid:1" }, + { "AuthoringToolId" => "id:2" }, + { "IfcGuid" => "guid:3" } + ] + } + ) + end + + # real data tests + Rails.root.glob("modules/bim/spec/fixtures/viewpoints/*.xml").each do |xml_file| + it "converts #{xml_file.basename} like Hash.from_xml() would" do + xml = xml_file.read + fast_hash = pps(described_class.xml_to_hash(xml)) + slow_hash = pps(Hash.from_xml(xml)) + expect(fast_hash).to eq(slow_hash) + end + end + end +end