diff options
Diffstat (limited to 'lib/gitlab/github_import/markdown_text.rb')
-rw-r--r-- | lib/gitlab/github_import/markdown_text.rb | 50 |
1 files changed, 21 insertions, 29 deletions
diff --git a/lib/gitlab/github_import/markdown_text.rb b/lib/gitlab/github_import/markdown_text.rb index bf2856bc77f..2424b3e8c1f 100644 --- a/lib/gitlab/github_import/markdown_text.rb +++ b/lib/gitlab/github_import/markdown_text.rb @@ -8,23 +8,12 @@ module Gitlab class MarkdownText include Gitlab::EncodingHelper - ISSUE_REF_MATCHER = '%{github_url}/%{import_source}/issues' - PULL_REF_MATCHER = '%{github_url}/%{import_source}/pull' - - MEDIA_TYPES = %w[gif jpeg jpg mov mp4 png svg webm].freeze - DOC_TYPES = %w[ - csv docx fodg fodp fods fodt gz log md odf odg odp ods - odt pdf pptx tgz txt xls xlsx zip - ].freeze - ALL_TYPES = (MEDIA_TYPES + DOC_TYPES).freeze - # On github.com we have base url for docs and CDN url for media. # On github EE as far as we can know there is no CDN urls and media is placed on base url. - # To no escape the escaping symbol we use single quotes instead of double with interpolation. - # rubocop:disable Style/StringConcatenation - CDN_URL_MATCHER = '(!\[.+\]\(%{github_media_cdn}/\d+/(\w|-)+\.(' + MEDIA_TYPES.join('|') + ')\))' - BASE_URL_MATCHER = '(\[.+\]\(%{github_url}/.+/.+/files/\d+/.+\.(' + ALL_TYPES.join('|') + ')\))' - # rubocop:enable Style/StringConcatenation + GITHUB_MEDIA_CDN = 'https://user-images.githubusercontent.com' + + ISSUE_REF_MATCHER = '%{github_url}/%{import_source}/issues' + PULL_REF_MATCHER = '%{github_url}/%{import_source}/pull' class << self def format(*args) @@ -42,20 +31,6 @@ module Gitlab .gsub(pull_ref_matcher, url_helpers.project_merge_requests_url(project)) end - def fetch_attachment_urls(text) - cdn_url_matcher = CDN_URL_MATCHER % { github_media_cdn: Regexp.escape(github_media_cdn) } - doc_url_matcher = BASE_URL_MATCHER % { github_url: Regexp.escape(github_url) } - - text.scan(Regexp.new(cdn_url_matcher)).map(&:first) + - text.scan(Regexp.new(doc_url_matcher)).map(&:first) - end - - private - - def github_media_cdn - 'https://user-images.githubusercontent.com' - end - # Returns github domain without slash in the end def github_url oauth_config = Gitlab::Auth::OAuth::Provider.config_for('github') || {} @@ -63,6 +38,23 @@ module Gitlab url = url.chop if url.end_with?('/') url end + + def fetch_attachments(text) + attachments = [] + doc = CommonMarker.render_doc(text) + + doc.walk do |node| + attachment = extract_attachment(node) + attachments << attachment if attachment + end + attachments + end + + private + + def extract_attachment(node) + ::Gitlab::GithubImport::Markdown::Attachment.from_markdown(node) + end end # text - The Markdown text as a String. |