-
Notifications
You must be signed in to change notification settings - Fork 0
Enhance embed URL handling and validation system #7
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: embed-url-handling-pre
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,27 @@ | ||
| /* global discourseUrl */ | ||
| /* global discourseEmbedUrl */ | ||
| (function() { | ||
|
|
||
| var comments = document.getElementById('discourse-comments'), | ||
| iframe = document.createElement('iframe'); | ||
| iframe.src = discourseUrl + "embed/best?embed_url=" + encodeURIComponent(discourseEmbedUrl); | ||
| iframe.id = 'discourse-embed-frame'; | ||
| iframe.width = "100%"; | ||
| iframe.frameBorder = "0"; | ||
| iframe.scrolling = "no"; | ||
| comments.appendChild(iframe); | ||
|
|
||
|
|
||
| function postMessageReceived(e) { | ||
| if (!e) { return; } | ||
| if (discourseUrl.indexOf(e.origin) === -1) { return; } | ||
|
|
||
| if (e.data) { | ||
| if (e.data.type === 'discourse-resize' && e.data.height) { | ||
| iframe.height = e.data.height + "px"; | ||
| } | ||
| } | ||
| } | ||
| window.addEventListener('message', postMessageReceived, false); | ||
|
|
||
| })(); |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,69 @@ | ||
| //= require ./vendor/normalize | ||
| //= require ./common/foundation/base | ||
|
|
||
| article.post { | ||
| border-bottom: 1px solid #ddd; | ||
|
|
||
| .post-date { | ||
| float: right; | ||
| color: #aaa; | ||
| font-size: 12px; | ||
| margin: 4px 4px 0 0; | ||
| } | ||
|
|
||
| .author { | ||
| padding: 20px 0; | ||
| width: 92px; | ||
| float: left; | ||
|
|
||
| text-align: center; | ||
|
|
||
| h3 { | ||
| text-align: center; | ||
| color: #4a6b82; | ||
| font-size: 13px; | ||
| margin: 0; | ||
| } | ||
| } | ||
|
|
||
| .cooked { | ||
| padding: 20px 0; | ||
| margin-left: 92px; | ||
|
|
||
| p { | ||
| margin: 0 0 1em 0; | ||
| } | ||
| } | ||
| } | ||
|
|
||
| header { | ||
| padding: 10px 10px 20px 10px; | ||
|
|
||
| font-size: 18px; | ||
|
|
||
| border-bottom: 1px solid #ddd; | ||
| } | ||
|
|
||
| footer { | ||
| font-size: 18px; | ||
|
|
||
| .logo { | ||
| margin-right: 10px; | ||
| margin-top: 10px; | ||
| } | ||
|
|
||
| a[href].button { | ||
| margin: 10px 0 0 10px; | ||
| } | ||
| } | ||
|
|
||
| .logo { | ||
| float: right; | ||
| max-height: 30px; | ||
| } | ||
|
|
||
| a[href].button { | ||
| background-color: #eee; | ||
| padding: 5px; | ||
| display: inline-block; | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,34 @@ | ||
| class EmbedController < ApplicationController | ||
| skip_before_filter :check_xhr | ||
| skip_before_filter :preload_json | ||
| before_filter :ensure_embeddable | ||
|
|
||
| layout 'embed' | ||
|
|
||
| def best | ||
| embed_url = params.require(:embed_url) | ||
| topic_id = TopicEmbed.topic_id_for_embed(embed_url) | ||
|
|
||
| if topic_id | ||
| @topic_view = TopicView.new(topic_id, current_user, {best: 5}) | ||
| else | ||
| Jobs.enqueue(:retrieve_topic, user_id: current_user.try(:id), embed_url: embed_url) | ||
| render 'loading' | ||
| end | ||
|
|
||
| discourse_expires_in 1.minute | ||
| end | ||
|
|
||
| private | ||
|
|
||
| def ensure_embeddable | ||
| raise Discourse::InvalidAccess.new('embeddable host not set') if SiteSetting.embeddable_host.blank? | ||
| raise Discourse::InvalidAccess.new('invalid referer host') if URI(request.referer || '').host != SiteSetting.embeddable_host | ||
|
|
||
| response.headers['X-Frame-Options'] = "ALLOWALL" | ||
| rescue URI::InvalidURIError | ||
| raise Discourse::InvalidAccess.new('invalid referer host') | ||
| end | ||
|
|
||
|
|
||
| end | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,24 @@ | ||
| require_dependency 'email/sender' | ||
| require_dependency 'topic_retriever' | ||
|
|
||
| module Jobs | ||
|
|
||
| # Asynchronously retrieve a topic from an embedded site | ||
| class RetrieveTopic < Jobs::Base | ||
|
|
||
| def execute(args) | ||
| raise Discourse::InvalidParameters.new(:embed_url) unless args[:embed_url].present? | ||
|
|
||
| user = nil | ||
| if args[:user_id] | ||
| user = User.where(id: args[:user_id]).first | ||
| end | ||
|
|
||
| TopicRetriever.new(args[:embed_url], no_throttle: user.try(:staff?)).retrieve | ||
| end | ||
|
|
||
| end | ||
|
|
||
| end | ||
|
|
||
|
|
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,41 @@ | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| # | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| # Creates and Updates Topics based on an RSS or ATOM feed. | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| # | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| require 'digest/sha1' | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| require_dependency 'post_creator' | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| require_dependency 'post_revisor' | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| require 'open-uri' | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| module Jobs | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| class PollFeed < Jobs::Scheduled | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| recurrence { hourly } | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| sidekiq_options retry: false | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| def execute(args) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| poll_feed if SiteSetting.feed_polling_enabled? && | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| SiteSetting.feed_polling_url.present? && | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| SiteSetting.embed_by_username.present? | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| end | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| def feed_key | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| @feed_key ||= "feed-modified:#{Digest::SHA1.hexdigest(SiteSetting.feed_polling_url)}" | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| end | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| def poll_feed | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| user = User.where(username_lower: SiteSetting.embed_by_username.downcase).first | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| return if user.blank? | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| require 'simple-rss' | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| rss = SimpleRSS.parse open(SiteSetting.feed_polling_url) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| rss.items.each do |i| | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| url = i.link | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| url = i.id if url.blank? || url !~ /^https?\:\/\// | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| content = CGI.unescapeHTML(i.content.scrub) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| TopicEmbed.import(user, url, i.title, content) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| end | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
Comment on lines
+24
to
+37
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Guard against missing Many feeds don’t populate a Consider falling back to - rss.items.each do |i|
- url = i.link
- url = i.id if url.blank? || url !~ /^https?\:\/\//
-
- content = CGI.unescapeHTML(i.content.scrub)
- TopicEmbed.import(user, url, i.title, content)
- end
+ rss.items.each do |i|
+ url = i.link
+ url = i.id if url.blank? || url !~ /^https?\:\/\//
+
+ raw_content = i.content || i.description || i.summary || ""
+ content = CGI.unescapeHTML(raw_content.to_s.scrub)
+ TopicEmbed.import(user, url, i.title, content)
+ end(Separately, if you expect untrusted or variable 📝 Committable suggestion
Suggested change
🧰 Tools🪛 Brakeman (7.1.1)[medium] 29-29: Model attribute used in file name (File Access) 🤖 Prompt for AI Agents |
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| end | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| end | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| end | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| Original file line number | Diff line number | Diff line change | ||||||||
|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,82 @@ | ||||||||||
| require_dependency 'nokogiri' | ||||||||||
|
|
||||||||||
| class TopicEmbed < ActiveRecord::Base | ||||||||||
| belongs_to :topic | ||||||||||
| belongs_to :post | ||||||||||
| validates_presence_of :embed_url | ||||||||||
| validates_presence_of :content_sha1 | ||||||||||
|
|
||||||||||
| # Import an article from a source (RSS/Atom/Other) | ||||||||||
| def self.import(user, url, title, contents) | ||||||||||
| return unless url =~ /^https?\:\/\// | ||||||||||
|
|
||||||||||
| contents << "\n<hr>\n<small>#{I18n.t('embed.imported_from', link: "<a href='#{url}'>#{url}</a>")}</small>\n" | ||||||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Critical: Potential XSS vulnerability in URL interpolation. The URL is directly interpolated into HTML without escaping, which could allow XSS attacks if the URL contains malicious characters or JavaScript. Even though the content uses Apply proper HTML escaping: - contents << "\n<hr>\n<small>#{I18n.t('embed.imported_from', link: "<a href='#{url}'>#{url}</a>")}</small>\n"
+ require 'cgi'
+ escaped_url = CGI.escapeHTML(url)
+ contents << "\n<hr>\n<small>#{I18n.t('embed.imported_from', link: "<a href='#{escaped_url}'>#{escaped_url}</a>")}</small>\n"📝 Committable suggestion
Suggested change
🤖 Prompt for AI Agents |
||||||||||
|
|
||||||||||
| embed = TopicEmbed.where(embed_url: url).first | ||||||||||
| content_sha1 = Digest::SHA1.hexdigest(contents) | ||||||||||
| post = nil | ||||||||||
|
|
||||||||||
| # If there is no embed, create a topic, post and the embed. | ||||||||||
| if embed.blank? | ||||||||||
| Topic.transaction do | ||||||||||
| creator = PostCreator.new(user, title: title, raw: absolutize_urls(url, contents), skip_validations: true, cook_method: Post.cook_methods[:raw_html]) | ||||||||||
| post = creator.create | ||||||||||
| if post.present? | ||||||||||
| TopicEmbed.create!(topic_id: post.topic_id, | ||||||||||
| embed_url: url, | ||||||||||
| content_sha1: content_sha1, | ||||||||||
| post_id: post.id) | ||||||||||
| end | ||||||||||
| end | ||||||||||
| else | ||||||||||
| post = embed.post | ||||||||||
| # Update the topic if it changed | ||||||||||
| if content_sha1 != embed.content_sha1 | ||||||||||
| revisor = PostRevisor.new(post) | ||||||||||
| revisor.revise!(user, absolutize_urls(url, contents), skip_validations: true, bypass_rate_limiter: true) | ||||||||||
| embed.update_column(:content_sha1, content_sha1) | ||||||||||
| end | ||||||||||
| end | ||||||||||
|
|
||||||||||
| post | ||||||||||
| end | ||||||||||
|
|
||||||||||
| def self.import_remote(user, url, opts=nil) | ||||||||||
| require 'ruby-readability' | ||||||||||
|
|
||||||||||
| opts = opts || {} | ||||||||||
| doc = Readability::Document.new(open(url).read, | ||||||||||
| tags: %w[div p code pre h1 h2 h3 b em i strong a img], | ||||||||||
| attributes: %w[href src]) | ||||||||||
|
|
||||||||||
| TopicEmbed.import(user, url, opts[:title] || doc.title, doc.content) | ||||||||||
| end | ||||||||||
|
Comment on lines
+44
to
+53
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Critical: SSRF vulnerability using Using Ruby's
This allows attackers to:
Replace with a safe HTTP client and validate the URL scheme: def self.import_remote(user, url, opts=nil)
require 'ruby-readability'
+ require 'open-uri'
+
+ # Validate URL scheme
+ uri = URI.parse(url)
+ unless uri.is_a?(URI::HTTP) || uri.is_a?(URI::HTTPS)
+ raise ArgumentError, 'Only HTTP(S) URLs are allowed'
+ end
opts = opts || {}
- doc = Readability::Document.new(open(url).read,
+ # Use OpenURI with safe options or better, use Net::HTTP directly
+ content = URI.open(url, redirect: true, read_timeout: 10).read
+ doc = Readability::Document.new(content,
tags: %w[div p code pre h1 h2 h3 b em i strong a img],
attributes: %w[href src])
TopicEmbed.import(user, url, opts[:title] || doc.title, doc.content)
endBetter yet, use a dedicated HTTP client library with proper timeouts and safeguards: require 'net/http'
uri = URI.parse(url)
raise ArgumentError, 'Only HTTP(S) URLs are allowed' unless uri.is_a?(URI::HTTP) || uri.is_a?(URI::HTTPS)
# Add additional checks to prevent access to internal networks
if uri.host.match?(/^(127\.|10\.|172\.(1[6-9]|2\d|3[01])\.|192\.168\.)/)
raise ArgumentError, 'Access to internal networks not allowed'
end
http = Net::HTTP.new(uri.host, uri.port)
http.use_ssl = (uri.scheme == 'https')
http.open_timeout = 5
http.read_timeout = 10
request = Net::HTTP::Get.new(uri.request_uri)
response = http.request(request)
content = response.body🤖 Prompt for AI Agents |
||||||||||
|
|
||||||||||
| # Convert any relative URLs to absolute. RSS is annoying for this. | ||||||||||
| def self.absolutize_urls(url, contents) | ||||||||||
| uri = URI(url) | ||||||||||
| prefix = "#{uri.scheme}://#{uri.host}" | ||||||||||
| prefix << ":#{uri.port}" if uri.port != 80 && uri.port != 443 | ||||||||||
|
|
||||||||||
| fragment = Nokogiri::HTML.fragment(contents) | ||||||||||
| fragment.css('a').each do |a| | ||||||||||
| href = a['href'] | ||||||||||
| if href.present? && href.start_with?('/') | ||||||||||
| a['href'] = "#{prefix}/#{href.sub(/^\/+/, '')}" | ||||||||||
| end | ||||||||||
| end | ||||||||||
| fragment.css('img').each do |a| | ||||||||||
| src = a['src'] | ||||||||||
| if src.present? && src.start_with?('/') | ||||||||||
| a['src'] = "#{prefix}/#{src.sub(/^\/+/, '')}" | ||||||||||
| end | ||||||||||
| end | ||||||||||
|
|
||||||||||
| fragment.to_html | ||||||||||
| end | ||||||||||
|
|
||||||||||
| def self.topic_id_for_embed(embed_url) | ||||||||||
| TopicEmbed.where(embed_url: embed_url).pluck(:topic_id).first | ||||||||||
| end | ||||||||||
|
|
||||||||||
| end | ||||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,30 @@ | ||
| <header> | ||
| <%- if @topic_view.posts.present? %> | ||
| <%= link_to(I18n.t('embed.title'), @topic_view.topic.url, class: 'button', target: '_blank') %> | ||
| <%- else %> | ||
| <%= link_to(I18n.t('embed.start_discussion'), @topic_view.topic.url, class: 'button', target: '_blank') %> | ||
| <%- end if %> | ||
|
Comment on lines
+2
to
+6
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fix invalid ERB block terminator (
- <%- if @topic_view.posts.present? %>
- <%= link_to(I18n.t('embed.title'), @topic_view.topic.url, class: 'button', target: '_blank') %>
- <%- else %>
- <%= link_to(I18n.t('embed.start_discussion'), @topic_view.topic.url, class: 'button', target: '_blank') %>
- <%- end if %>
+ <%- if @topic_view.posts.present? %>
+ <%= link_to(I18n.t('embed.title'), @topic_view.topic.url, class: 'button', target: '_blank') %>
+ <%- else %>
+ <%= link_to(I18n.t('embed.start_discussion'), @topic_view.topic.url, class: 'button', target: '_blank') %>
+ <%- end %>(Optionally, you might also add a short 🤖 Prompt for AI Agents |
||
|
|
||
| <%= link_to(image_tag(SiteSetting.logo_url, class: 'logo'), Discourse.base_url) %> | ||
| </header> | ||
|
|
||
| <%- if @topic_view.posts.present? %> | ||
| <%- @topic_view.posts.each do |post| %> | ||
| <article class='post'> | ||
| <%= link_to post.created_at.strftime("%e %b %Y"), post.url, class: 'post-date', target: "_blank" %> | ||
| <div class='author'> | ||
| <img src='<%= post.user.small_avatar_url %>'> | ||
| <h3><%= post.user.username %></h3> | ||
| </div> | ||
| <div class='cooked'><%= raw post.cooked %></div> | ||
| <div style='clear: both'></div> | ||
| </article> | ||
| <%- end %> | ||
|
|
||
| <footer> | ||
| <%= link_to(I18n.t('embed.continue'), @topic_view.topic.url, class: 'button', target: '_blank') %> | ||
| <%= link_to(image_tag(SiteSetting.logo_url, class: 'logo'), Discourse.base_url) %> | ||
| </footer> | ||
|
|
||
| <% end %> | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🛠️ Refactor suggestion | 🟠 Major
Validate
embed_urlparameter format.The
embed_urlparameter is required but not validated for URL format. This could allow malformed or non-HTTP(S) URLs to be processed, potentially causing issues in downstream logic.Consider adding URL format validation:
🤖 Prompt for AI Agents