summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorDouwe Maan <douwe@gitlab.com>2017-09-01 14:30:43 +0000
committerDouwe Maan <douwe@gitlab.com>2017-09-01 14:30:43 +0000
commitdceb2112d2ec34a947edfb2c9ec4d286fea4661a (patch)
treef1d2f5f7724a6560d88d2a848a0cb6f504aafdb1 /lib
parentba3cfd07dd9bd0de57239d5a748d98f783507d92 (diff)
parent4761235f6944d1627346ca835a192c1ed32f745e (diff)
downloadgitlab-ce-dceb2112d2ec34a947edfb2c9ec4d286fea4661a.tar.gz
Merge branch 'bvl-validate-po-files' into 'master'
Validate PO files in static analysis See merge request !13000
Diffstat (limited to 'lib')
-rw-r--r--lib/gitlab/i18n/metadata_entry.rb27
-rw-r--r--lib/gitlab/i18n/po_linter.rb216
-rw-r--r--lib/gitlab/i18n/translation_entry.rb92
-rw-r--r--lib/gitlab/sentry.rb2
-rw-r--r--lib/gitlab/utils.rb4
-rw-r--r--lib/tasks/gettext.rake40
6 files changed, 381 insertions, 0 deletions
diff --git a/lib/gitlab/i18n/metadata_entry.rb b/lib/gitlab/i18n/metadata_entry.rb
new file mode 100644
index 00000000000..35d57459a3d
--- /dev/null
+++ b/lib/gitlab/i18n/metadata_entry.rb
@@ -0,0 +1,27 @@
+module Gitlab
+ module I18n
+ class MetadataEntry
+ attr_reader :entry_data
+
+ def initialize(entry_data)
+ @entry_data = entry_data
+ end
+
+ def expected_plurals
+ return nil unless plural_information
+
+ plural_information['nplurals'].to_i
+ end
+
+ private
+
+ def plural_information
+ return @plural_information if defined?(@plural_information)
+
+ if plural_line = entry_data[:msgstr].detect { |metadata_line| metadata_line.starts_with?('Plural-Forms: ') }
+ @plural_information = Hash[plural_line.scan(/(\w+)=([^;\n]+)/)]
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/i18n/po_linter.rb b/lib/gitlab/i18n/po_linter.rb
new file mode 100644
index 00000000000..2e02787a4f4
--- /dev/null
+++ b/lib/gitlab/i18n/po_linter.rb
@@ -0,0 +1,216 @@
+require 'simple_po_parser'
+
+module Gitlab
+ module I18n
+ class PoLinter
+ attr_reader :po_path, :translation_entries, :metadata_entry, :locale
+
+ VARIABLE_REGEX = /%{\w*}|%[a-z]/.freeze
+
+ def initialize(po_path, locale = I18n.locale.to_s)
+ @po_path = po_path
+ @locale = locale
+ end
+
+ def errors
+ @errors ||= validate_po
+ end
+
+ def validate_po
+ if parse_error = parse_po
+ return 'PO-syntax errors' => [parse_error]
+ end
+
+ validate_entries
+ end
+
+ def parse_po
+ entries = SimplePoParser.parse(po_path)
+
+ # The first entry is the metadata entry if there is one.
+ # This is an entry when empty `msgid`
+ if entries.first[:msgid].empty?
+ @metadata_entry = Gitlab::I18n::MetadataEntry.new(entries.shift)
+ else
+ return 'Missing metadata entry.'
+ end
+
+ @translation_entries = entries.map do |entry_data|
+ Gitlab::I18n::TranslationEntry.new(entry_data, metadata_entry.expected_plurals)
+ end
+
+ nil
+ rescue SimplePoParser::ParserError => e
+ @translation_entries = []
+ e.message
+ end
+
+ def validate_entries
+ errors = {}
+
+ translation_entries.each do |entry|
+ errors_for_entry = validate_entry(entry)
+ errors[join_message(entry.msgid)] = errors_for_entry if errors_for_entry.any?
+ end
+
+ errors
+ end
+
+ def validate_entry(entry)
+ errors = []
+
+ validate_flags(errors, entry)
+ validate_variables(errors, entry)
+ validate_newlines(errors, entry)
+ validate_number_of_plurals(errors, entry)
+ validate_unescaped_chars(errors, entry)
+
+ errors
+ end
+
+ def validate_unescaped_chars(errors, entry)
+ if entry.msgid_contains_unescaped_chars?
+ errors << 'contains unescaped `%`, escape it using `%%`'
+ end
+
+ if entry.plural_id_contains_unescaped_chars?
+ errors << 'plural id contains unescaped `%`, escape it using `%%`'
+ end
+
+ if entry.translations_contain_unescaped_chars?
+ errors << 'translation contains unescaped `%`, escape it using `%%`'
+ end
+ end
+
+ def validate_number_of_plurals(errors, entry)
+ return unless metadata_entry&.expected_plurals
+ return unless entry.translated?
+
+ if entry.has_plural? && entry.all_translations.size != metadata_entry.expected_plurals
+ errors << "should have #{metadata_entry.expected_plurals} "\
+ "#{'translations'.pluralize(metadata_entry.expected_plurals)}"
+ end
+ end
+
+ def validate_newlines(errors, entry)
+ if entry.msgid_contains_newlines?
+ errors << 'is defined over multiple lines, this breaks some tooling.'
+ end
+
+ if entry.plural_id_contains_newlines?
+ errors << 'plural is defined over multiple lines, this breaks some tooling.'
+ end
+
+ if entry.translations_contain_newlines?
+ errors << 'has translations defined over multiple lines, this breaks some tooling.'
+ end
+ end
+
+ def validate_variables(errors, entry)
+ if entry.has_singular_translation?
+ validate_variables_in_message(errors, entry.msgid, entry.singular_translation)
+ end
+
+ if entry.has_plural?
+ entry.plural_translations.each do |translation|
+ validate_variables_in_message(errors, entry.plural_id, translation)
+ end
+ end
+ end
+
+ def validate_variables_in_message(errors, message_id, message_translation)
+ message_id = join_message(message_id)
+ required_variables = message_id.scan(VARIABLE_REGEX)
+
+ validate_unnamed_variables(errors, required_variables)
+ validate_translation(errors, message_id, required_variables)
+ validate_variable_usage(errors, message_translation, required_variables)
+ end
+
+ def validate_translation(errors, message_id, used_variables)
+ variables = fill_in_variables(used_variables)
+
+ begin
+ Gitlab::I18n.with_locale(locale) do
+ translated = if message_id.include?('|')
+ FastGettext::Translation.s_(message_id)
+ else
+ FastGettext::Translation._(message_id)
+ end
+
+ translated % variables
+ end
+
+ # `sprintf` could raise an `ArgumentError` when invalid passing something
+ # other than a Hash when using named variables
+ #
+ # `sprintf` could raise `TypeError` when passing a wrong type when using
+ # unnamed variables
+ #
+ # FastGettext::Translation could raise `RuntimeError` (raised as a string),
+ # or as subclassess `NoTextDomainConfigured` & `InvalidFormat`
+ #
+ # `FastGettext::Translation` could raise `ArgumentError` as subclassess
+ # `InvalidEncoding`, `IllegalSequence` & `InvalidCharacter`
+ rescue ArgumentError, TypeError, RuntimeError => e
+ errors << "Failure translating to #{locale} with #{variables}: #{e.message}"
+ end
+ end
+
+ def fill_in_variables(variables)
+ if variables.empty?
+ []
+ elsif variables.any? { |variable| unnamed_variable?(variable) }
+ variables.map do |variable|
+ variable == '%d' ? Random.rand(1000) : Gitlab::Utils.random_string
+ end
+ else
+ variables.inject({}) do |hash, variable|
+ variable_name = variable[/\w+/]
+ hash[variable_name] = Gitlab::Utils.random_string
+ hash
+ end
+ end
+ end
+
+ def validate_unnamed_variables(errors, variables)
+ if variables.size > 1 && variables.any? { |variable_name| unnamed_variable?(variable_name) }
+ errors << 'is combining multiple unnamed variables'
+ end
+ end
+
+ def validate_variable_usage(errors, translation, required_variables)
+ translation = join_message(translation)
+
+ # We don't need to validate when the message is empty.
+ # In this case we fall back to the default, which has all the the
+ # required variables.
+ return if translation.empty?
+
+ found_variables = translation.scan(VARIABLE_REGEX)
+
+ missing_variables = required_variables - found_variables
+ if missing_variables.any?
+ errors << "<#{translation}> is missing: [#{missing_variables.to_sentence}]"
+ end
+
+ unknown_variables = found_variables - required_variables
+ if unknown_variables.any?
+ errors << "<#{translation}> is using unknown variables: [#{unknown_variables.to_sentence}]"
+ end
+ end
+
+ def unnamed_variable?(variable_name)
+ !variable_name.start_with?('%{')
+ end
+
+ def validate_flags(errors, entry)
+ errors << "is marked #{entry.flag}" if entry.flag
+ end
+
+ def join_message(message)
+ Array(message).join
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/i18n/translation_entry.rb b/lib/gitlab/i18n/translation_entry.rb
new file mode 100644
index 00000000000..e6c95afca7e
--- /dev/null
+++ b/lib/gitlab/i18n/translation_entry.rb
@@ -0,0 +1,92 @@
+module Gitlab
+ module I18n
+ class TranslationEntry
+ PERCENT_REGEX = /(?:^|[^%])%(?!{\w*}|[a-z%])/.freeze
+
+ attr_reader :nplurals, :entry_data
+
+ def initialize(entry_data, nplurals)
+ @entry_data = entry_data
+ @nplurals = nplurals
+ end
+
+ def msgid
+ entry_data[:msgid]
+ end
+
+ def plural_id
+ entry_data[:msgid_plural]
+ end
+
+ def has_plural?
+ plural_id.present?
+ end
+
+ def singular_translation
+ all_translations.first if has_singular_translation?
+ end
+
+ def all_translations
+ @all_translations ||= entry_data.fetch_values(*translation_keys)
+ .reject(&:empty?)
+ end
+
+ def translated?
+ all_translations.any?
+ end
+
+ def plural_translations
+ return [] unless has_plural?
+ return [] unless translated?
+
+ @plural_translations ||= if has_singular_translation?
+ all_translations.drop(1)
+ else
+ all_translations
+ end
+ end
+
+ def flag
+ entry_data[:flag]
+ end
+
+ def has_singular_translation?
+ nplurals > 1 || !has_plural?
+ end
+
+ def msgid_contains_newlines?
+ msgid.is_a?(Array)
+ end
+
+ def plural_id_contains_newlines?
+ plural_id.is_a?(Array)
+ end
+
+ def translations_contain_newlines?
+ all_translations.any? { |translation| translation.is_a?(Array) }
+ end
+
+ def msgid_contains_unescaped_chars?
+ contains_unescaped_chars?(msgid)
+ end
+
+ def plural_id_contains_unescaped_chars?
+ contains_unescaped_chars?(plural_id)
+ end
+
+ def translations_contain_unescaped_chars?
+ all_translations.any? { |translation| contains_unescaped_chars?(translation) }
+ end
+
+ def contains_unescaped_chars?(string)
+ string =~ PERCENT_REGEX
+ end
+
+ private
+
+ def translation_keys
+ @translation_keys ||= entry_data.keys.select { |key| key.to_s =~ /\Amsgstr(\[\d+\])?\z/ }
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/sentry.rb b/lib/gitlab/sentry.rb
index f6bdd6cf0fe..159d0e7952e 100644
--- a/lib/gitlab/sentry.rb
+++ b/lib/gitlab/sentry.rb
@@ -9,6 +9,8 @@ module Gitlab
def self.context(current_user = nil)
return unless self.enabled?
+ Raven.tags_context(locale: I18n.locale)
+
if current_user
Raven.user_context(
id: current_user.id,
diff --git a/lib/gitlab/utils.rb b/lib/gitlab/utils.rb
index 9670c93759e..abb3d3a02c3 100644
--- a/lib/gitlab/utils.rb
+++ b/lib/gitlab/utils.rb
@@ -42,5 +42,9 @@ module Gitlab
'No'
end
end
+
+ def random_string
+ Random.rand(Float::MAX.to_i).to_s(36)
+ end
end
end
diff --git a/lib/tasks/gettext.rake b/lib/tasks/gettext.rake
index b48e4dce445..e1491f29b5e 100644
--- a/lib/tasks/gettext.rake
+++ b/lib/tasks/gettext.rake
@@ -19,4 +19,44 @@ namespace :gettext do
Rake::Task['gettext:pack'].invoke
Rake::Task['gettext:po_to_json'].invoke
end
+
+ desc 'Lint all po files in `locale/'
+ task lint: :environment do
+ FastGettext.silence_errors
+ files = Dir.glob(Rails.root.join('locale/*/gitlab.po'))
+
+ linters = files.map do |file|
+ locale = File.basename(File.dirname(file))
+
+ Gitlab::I18n::PoLinter.new(file, locale)
+ end
+
+ pot_file = Rails.root.join('locale/gitlab.pot')
+ linters.unshift(Gitlab::I18n::PoLinter.new(pot_file))
+
+ failed_linters = linters.select { |linter| linter.errors.any? }
+
+ if failed_linters.empty?
+ puts 'All PO files are valid.'
+ else
+ failed_linters.each do |linter|
+ report_errors_for_file(linter.po_path, linter.errors)
+ end
+
+ raise "Not all PO-files are valid: #{failed_linters.map(&:po_path).to_sentence}"
+ end
+ end
+
+ def report_errors_for_file(file, errors_for_file)
+ puts "Errors in `#{file}`:"
+
+ errors_for_file.each do |message_id, errors|
+ puts " #{message_id}"
+ errors.each do |error|
+ spaces = ' ' * 4
+ error = error.lines.join("#{spaces}")
+ puts "#{spaces}#{error}"
+ end
+ end
+ end
end