Gem name changed from ‚text_extractor‘ to ‚plaintext‘

This commit is contained in:
Wieland Lindenthal
2018-02-15 17:15:00 +01:00
parent 68d4f36a15
commit 90ba46d406
9 changed files with 58 additions and 21 deletions
+1 -1
View File
@@ -90,5 +90,5 @@ npm-debug.log*
/frontend/dist/
/frontend/tests/*.gif
node_modules/
text_extractor.yml
plaintext.yml
structure.sql
+1 -2
View File
@@ -163,8 +163,7 @@ gem 'aws-sdk', '~> 2.10.1'
gem 'openproject-token', '~> 1.0.1'
gem 'text-extractor', '0.1.0'
gem 'plaintext', '0.1.0'
group :test do
gem 'rack-test', '~> 0.6.3'
+5 -5
View File
@@ -383,6 +383,10 @@ GEM
rake (>= 0.8.1)
pdf-core (0.7.0)
pg (0.21.0)
plaintext (0.1.0)
activesupport (> 2.2.1)
nokogiri (~> 1.8.1)
rubyzip (~> 1.2.1)
powerpack (0.1.1)
prawn (2.2.2)
pdf-core (~> 0.7.0)
@@ -559,10 +563,6 @@ GEM
sys-filesystem (1.1.8)
ffi
test-prof (0.1.0)
text-extractor (0.1.0)
activesupport (> 2.2.1)
nokogiri (~> 1.8.1)
rubyzip (~> 1.2.1)
thin (1.7.2)
daemons (~> 1.0, >= 1.0.9)
eventmachine (~> 1.0, >= 1.0.4)
@@ -674,6 +674,7 @@ DEPENDENCIES
parallel_tests (~> 2.14.0)
passenger
pg (~> 0.21.0)
plaintext (= 0.1.0)
prawn (~> 2.2)
prawn-table (~> 0.2.2)
pry-byebug (~> 3.4.2)
@@ -720,7 +721,6 @@ DEPENDENCIES
syck (~> 1.3.0)
sys-filesystem (~> 1.1.4)
test-prof
text-extractor (= 0.1.0)
thin (~> 1.7.2)
timecop (~> 0.9.0)
transactional_lock!
+6 -6
View File
@@ -77,12 +77,12 @@ class AdminController < ApplicationController
@checklist = [
[:text_default_administrator_account_changed, User.default_admin_account_changed?],
[:text_file_repository_writable, repository_writable],
[:'extraction.available.pdftotext', TextExtractor::PdfHandler.available?],
[:'extraction.available.unrtf', TextExtractor::RtfHandler.available?],
[:'extraction.available.catdoc', TextExtractor::DocHandler.available?],
[:'extraction.available.xls2csv', TextExtractor::XlsHandler.available?],
[:'extraction.available.catppt', TextExtractor::PptHandler.available?],
[:'extraction.available.tesseract', TextExtractor::ImageHandler.available?]
[:'extraction.available.pdftotext', Plaintext::PdfHandler.available?],
[:'extraction.available.unrtf', Plaintext::RtfHandler.available?],
[:'extraction.available.catdoc', Plaintext::DocHandler.available?],
[:'extraction.available.xls2csv', Plaintext::XlsHandler.available?],
[:'extraction.available.catppt', Plaintext::PptHandler.available?],
[:'extraction.available.tesseract', Plaintext::ImageHandler.available?]
]
@storage_information = OpenProject::Storage.mount_information
+1 -1
View File
@@ -55,7 +55,7 @@ class ExtractFulltextJob < ApplicationJob
begin
if @attachment.readable?
resolver = TextExtractor::Resolver.new(@file, @attachment.content_type)
resolver = Plaintext::Resolver.new(@file, @attachment.content_type)
@text = resolver.text
end
rescue => e
@@ -27,8 +27,8 @@
# See doc/COPYRIGHT.rdoc for more details.
#++
file_name = File.join([Rails.root.to_s, 'config', 'text_extractor.yml'])
file_name = File.join([Rails.root.to_s, 'config', 'plaintext.yml'])
if File.file?(file_name)
config_file = File.read(file_name)
TextExtractor::Configuration.load(config_file)
Plaintext::Configuration.load(config_file)
end
+38
View File
@@ -0,0 +1,38 @@
# Text extraction helper programs.
#
# commands should write the resulting plain text to STDOUT. Use __FILE__ as
# placeholder for the file path. The values below are the defaults.
# apt install poppler-utils
pdftotext:
- /usr/local/bin/pdftotext
- -enc
- UTF-8
- __FILE__
- '-'
# apt install unrtf
unrtf:
- /usr/local/bin/unrtf
- --text
- __FILE__
tesseract:
- /usr/local/bin/tesseract
- __FILE__
- stdout
# apt install catdoc
catdoc:
- /usr/bin/textutil
- -convert
- txt
- -stdout
- __FILE__
# xls2csv:
# - /usr/bin/xls2csv
# - -dutf-8
# - __FILE__
# catppt:
# - /usr/bin/catppt
# - -dutf-8
# - __FILE__
@@ -263,10 +263,10 @@ describe 'filter work packages', js: true do
allow(EnterpriseToken).to receive(:allows_to?).and_return(false)
allow(EnterpriseToken).to receive(:allows_to?).with(:attachment_filters).and_return(true)
allow_any_instance_of(TextExtractor::Resolver).to receive(:text).and_return('I am the first text $1.99.')
allow_any_instance_of(Plaintext::Resolver).to receive(:text).and_return('I am the first text $1.99.')
wp_with_attachment_a
ExtractFulltextJob.new(attachment_a.id).perform
allow_any_instance_of(TextExtractor::Resolver).to receive(:text).and_return('I am the second text.')
allow_any_instance_of(Plaintext::Resolver).to receive(:text).and_return('I am the second text.')
wp_with_attachment_b
ExtractFulltextJob.new(attachment_b.id).perform
wp_without_attachment
+2 -2
View File
@@ -38,7 +38,7 @@ describe ExtractFulltextJob, type: :job do
context "with successful text extraction" do
before do
allow_any_instance_of(TextExtractor::Resolver).to receive(:text).and_return(text)
allow_any_instance_of(Plaintext::Resolver).to receive(:text).and_return(text)
end
context 'attachment is readable' do
@@ -91,7 +91,7 @@ describe ExtractFulltextJob, type: :job do
let(:logger) { Rails.logger }
before do
allow_any_instance_of(TextExtractor::Resolver).to receive(:text).and_raise(exception_message)
allow_any_instance_of(Plaintext::Resolver).to receive(:text).and_raise(exception_message)
# This line is actually part of the test. `expect` call needs to go so far up here, as we want to verify that a message gets logged.
expect(logger).to receive(:error).with(exception_message)