ai-code-review-evaluations · akshayutture-augment · Dec 31, 2013 · coderabbitai · Nov 21, 2025 · coderabbitai
diff --git a/Gemfile b/Gemfile
@@ -206,6 +206,10 @@ gem 'unicorn', require: false
 gem 'puma', require: false
 gem 'rbtrace', require: false
 
+# required for feed importing and embedding
+gem 'ruby-readability', require: false
+gem 'simple-rss', require: false
+
 # perftools only works on 1.9 atm
 group :profile do
   # travis refuses to install this, instead of fuffing, just avoid it for now

diff --git a/Gemfile_rails4.lock b/Gemfile_rails4.lock
@@ -117,6 +117,7 @@ GEM
     fspath (2.0.5)
     given_core (3.1.1)
       sorcerer (>= 0.3.7)
+    guess_html_encoding (0.0.9)
     handlebars-source (1.1.2)
     hashie (2.0.5)
     highline (1.6.20)
@@ -309,6 +310,9 @@ GEM
       rspec-mocks (~> 2.14.0)
     ruby-hmac (0.4.0)
     ruby-openid (2.3.0)
+    ruby-readability (0.5.7)
+      guess_html_encoding (>= 0.0.4)
+      nokogiri (>= 1.4.2)
     sanitize (2.0.6)
       nokogiri (>= 1.4.4)
     sass (3.2.12)
@@ -337,6 +341,7 @@ GEM
       celluloid (>= 0.14.1)
       ice_cube (~> 0.11.0)
       sidekiq (~> 2.15.0)
+    simple-rss (1.3.1)
     simplecov (0.7.1)
       multi_json (~> 1.0)
       simplecov-html (~> 0.7.1)
@@ -466,6 +471,7 @@ DEPENDENCIES
   rinku
   rspec-given
   rspec-rails
+  ruby-readability
   sanitize
   sass
   sass-rails
@@ -474,6 +480,7 @@ DEPENDENCIES
   sidekiq (= 2.15.1)
   sidekiq-failures
   sidetiq (>= 0.3.6)
+  simple-rss
   simplecov
   sinatra
   slim

diff --git a/app/assets/javascripts/embed.js b/app/assets/javascripts/embed.js
@@ -0,0 +1,27 @@
+/* global discourseUrl */
+/* global discourseEmbedUrl */
+(function() {
+
+  var comments = document.getElementById('discourse-comments'),
+      iframe = document.createElement('iframe');
+  iframe.src = discourseUrl + "embed/best?embed_url=" + encodeURIComponent(discourseEmbedUrl);
+  iframe.id = 'discourse-embed-frame';
+  iframe.width = "100%";
+  iframe.frameBorder = "0";
+  iframe.scrolling = "no";
+  comments.appendChild(iframe);
+
+
+  function postMessageReceived(e) {
+    if (!e) { return; }
+    if (discourseUrl.indexOf(e.origin) === -1) { return; }
+
+    if (e.data) {
+      if (e.data.type === 'discourse-resize' && e.data.height) {
+        iframe.height = e.data.height + "px";
+      }
+    }
+  }
+  window.addEventListener('message', postMessageReceived, false);
+
+})();
diff --git a/app/assets/stylesheets/embed.css.scss b/app/assets/stylesheets/embed.css.scss
@@ -0,0 +1,69 @@
+//= require ./vendor/normalize
+//= require ./common/foundation/base
+
+article.post {
+  border-bottom: 1px solid #ddd;
+
+  .post-date {
+    float: right;
+    color: #aaa;
+    font-size: 12px;
+    margin: 4px 4px 0 0;
+  }
+
+  .author {
+    padding: 20px 0;
+    width: 92px;
+    float: left;
+
+    text-align: center;
+
+    h3 {
+      text-align: center;
+      color: #4a6b82;
+      font-size: 13px;
+      margin: 0;
+    }
+  }
+
+  .cooked {
+    padding: 20px 0;
+    margin-left: 92px;
+
+    p {
+      margin: 0 0 1em 0;
+    }
+  }
+}
+
+header {
+  padding: 10px 10px 20px 10px;
+
+  font-size: 18px;
+
+  border-bottom: 1px solid #ddd;
+}
+
+footer {
+  font-size: 18px;
+
+  .logo {
+    margin-right: 10px;
+    margin-top: 10px;
+  }
+
+  a[href].button {
+    margin: 10px 0 0 10px;
+  }
+}
+
+.logo {
+  float: right;
+  max-height: 30px;
+}
+
+a[href].button {
+  background-color: #eee;
+  padding: 5px;
+  display: inline-block;
+}
diff --git a/app/controllers/embed_controller.rb b/app/controllers/embed_controller.rb
@@ -0,0 +1,34 @@
+class EmbedController < ApplicationController
+  skip_before_filter :check_xhr
+  skip_before_filter :preload_json
+  before_filter :ensure_embeddable
+
+  layout 'embed'
+
+  def best
+    embed_url = params.require(:embed_url)
+    topic_id = TopicEmbed.topic_id_for_embed(embed_url)
+
+    if topic_id
+      @topic_view = TopicView.new(topic_id, current_user, {best: 5})
+    else
+      Jobs.enqueue(:retrieve_topic, user_id: current_user.try(:id), embed_url: embed_url)
+      render 'loading'
+    end
+
+    discourse_expires_in 1.minute
+  end
+
+  private
+
+    def ensure_embeddable
+      raise Discourse::InvalidAccess.new('embeddable host not set') if SiteSetting.embeddable_host.blank?
+      raise Discourse::InvalidAccess.new('invalid referer host') if URI(request.referer || '').host != SiteSetting.embeddable_host
+
+      response.headers['X-Frame-Options'] = "ALLOWALL"
+    rescue URI::InvalidURIError
+      raise Discourse::InvalidAccess.new('invalid referer host')
+    end
+
+
+end
diff --git a/app/jobs/regular/retrieve_topic.rb b/app/jobs/regular/retrieve_topic.rb
@@ -0,0 +1,24 @@
+require_dependency 'email/sender'
+require_dependency 'topic_retriever'
+
+module Jobs
+
+  # Asynchronously retrieve a topic from an embedded site
+  class RetrieveTopic < Jobs::Base
+
+    def execute(args)
+      raise Discourse::InvalidParameters.new(:embed_url) unless args[:embed_url].present?
+
+      user = nil
+      if args[:user_id]
+        user = User.where(id: args[:user_id]).first
+      end
+
+      TopicRetriever.new(args[:embed_url], no_throttle: user.try(:staff?)).retrieve
+    end
+
+  end
+
+end
+
+
diff --git a/app/jobs/scheduled/poll_feed.rb b/app/jobs/scheduled/poll_feed.rb
@@ -0,0 +1,41 @@
+#
+# Creates and Updates Topics based on an RSS or ATOM feed.
+#
+require 'digest/sha1'
+require_dependency 'post_creator'
+require_dependency 'post_revisor'
+require 'open-uri'
+
+module Jobs
+  class PollFeed < Jobs::Scheduled
+    recurrence { hourly }
+    sidekiq_options retry: false
+
+    def execute(args)
+      poll_feed if SiteSetting.feed_polling_enabled? &&
+                   SiteSetting.feed_polling_url.present? &&
+                   SiteSetting.embed_by_username.present?
+    end
+
+    def feed_key
+      @feed_key ||= "feed-modified:#{Digest::SHA1.hexdigest(SiteSetting.feed_polling_url)}"
+    end
+
+    def poll_feed
+      user = User.where(username_lower: SiteSetting.embed_by_username.downcase).first
+      return if user.blank?
+
+      require 'simple-rss'
+      rss = SimpleRSS.parse open(SiteSetting.feed_polling_url)
+
+      rss.items.each do |i|
+        url = i.link
+        url = i.id if url.blank? || url !~ /^https?\:\/\//
+
+        content = CGI.unescapeHTML(i.content.scrub)
+        TopicEmbed.import(user, url, i.title, content)
+      end
-    def poll_feed
-      user = User.where(username_lower: SiteSetting.embed_by_username.downcase).first
-      return if user.blank?
-
-      require 'simple-rss'
-      rss = SimpleRSS.parse open(SiteSetting.feed_polling_url)
-
-      rss.items.each do |i|
-        url = i.link
-        url = i.id if url.blank? || url !~ /^https?\:\/\//
-
-        content = CGI.unescapeHTML(i.content.scrub)
-        TopicEmbed.import(user, url, i.title, content)
-      end
+    def poll_feed
+      user = User.where(username_lower: SiteSetting.embed_by_username.downcase).first
+      return if user.blank?
+
+      require 'simple-rss'
+      rss = SimpleRSS.parse open(SiteSetting.feed_polling_url)
+
+      rss.items.each do |i|
+        url = i.link
+        url = i.id if url.blank? || url !~ /^https?\:\/\//
+
+        raw_content = i.content || i.description || i.summary || ""
+        content = CGI.unescapeHTML(raw_content.to_s.scrub)
+        TopicEmbed.import(user, url, i.title, content)
+      end
-    def poll_feed
-      user = User.where(username_lower: SiteSetting.embed_by_username.downcase).first
-      return if user.blank?
-
-      require 'simple-rss'
-      rss = SimpleRSS.parse open(SiteSetting.feed_polling_url)
-
-      rss.items.each do |i|
-        url = i.link
-        url = i.id if url.blank? || url !~ /^https?\:\/\//
-
-        content = CGI.unescapeHTML(i.content.scrub)
-        TopicEmbed.import(user, url, i.title, content)
-      end
+    def poll_feed
+      user = User.where(username_lower: SiteSetting.embed_by_username.downcase).first
+      return if user.blank?
+
+      require 'simple-rss'
+      rss = SimpleRSS.parse open(SiteSetting.feed_polling_url)
+
+      rss.items.each do |i|
+        url = i.link
+        url = i.id if url.blank? || url !~ /^https?\:\/\//
+
+        raw_content = i.content || i.description || i.summary || ""
+        content = CGI.unescapeHTML(raw_content.to_s.scrub)
+        TopicEmbed.import(user, url, i.title, content)
+      end
+    end
+
+  end
+end
diff --git a/app/models/post.rb b/app/models/post.rb
@@ -60,6 +60,10 @@ def self.types
     @types ||= Enum.new(:regular, :moderator_action)
   end
 
+  def self.cook_methods
+    @cook_methods ||= Enum.new(:regular, :raw_html)
+  end
+
   def self.find_by_detail(key, value)
     includes(:post_details).where(post_details: { key: key, value: value }).first
   end
@@ -124,6 +128,11 @@ def post_analyzer
   end
 
   def cook(*args)
+    # For some posts, for example those imported via RSS, we support raw HTML. In that
+    # case we can skip the rendering pipeline.
+    return raw if cook_method == Post.cook_methods[:raw_html]
+
+    # Default is to cook posts
     Plugin::Filter.apply(:after_post_cook, self, post_analyzer.cook(*args))
   end
 

diff --git a/app/models/topic_embed.rb b/app/models/topic_embed.rb
@@ -0,0 +1,82 @@
+require_dependency 'nokogiri'
+
+class TopicEmbed < ActiveRecord::Base
+  belongs_to :topic
+  belongs_to :post
+  validates_presence_of :embed_url
+  validates_presence_of :content_sha1
+
+  # Import an article from a source (RSS/Atom/Other)
+  def self.import(user, url, title, contents)
+    return unless url =~ /^https?\:\/\//
+
+    contents << "\n<hr>\n<small>#{I18n.t('embed.imported_from', link: "<a href='#{url}'>#{url}</a>")}</small>\n"
-    contents << "\n<hr>\n<small>#{I18n.t('embed.imported_from', link: "<a href='#{url}'>#{url}</a>")}</small>\n"
+    require 'cgi'
+    escaped_url = CGI.escapeHTML(url)
+    contents << "\n<hr>\n<small>#{I18n.t('embed.imported_from', link: "<a href='#{escaped_url}'>#{escaped_url}</a>")}</small>\n"
-    contents << "\n<hr>\n<small>#{I18n.t('embed.imported_from', link: "<a href='#{url}'>#{url}</a>")}</small>\n"
+    require 'cgi'
+    escaped_url = CGI.escapeHTML(url)
+    contents << "\n<hr>\n<small>#{I18n.t('embed.imported_from', link: "<a href='#{escaped_url}'>#{escaped_url}</a>")}</small>\n"
+
+    embed = TopicEmbed.where(embed_url: url).first
+    content_sha1 = Digest::SHA1.hexdigest(contents)
+    post = nil
+
+    # If there is no embed, create a topic, post and the embed.
+    if embed.blank?
+      Topic.transaction do
+        creator = PostCreator.new(user, title: title, raw: absolutize_urls(url, contents), skip_validations: true, cook_method: Post.cook_methods[:raw_html])
+        post = creator.create
+        if post.present?
+          TopicEmbed.create!(topic_id: post.topic_id,
+                             embed_url: url,
+                             content_sha1: content_sha1,
+                             post_id: post.id)
+        end
+      end
+    else
+      post = embed.post
+      # Update the topic if it changed
+      if content_sha1 != embed.content_sha1
+        revisor = PostRevisor.new(post)
+        revisor.revise!(user, absolutize_urls(url, contents), skip_validations: true, bypass_rate_limiter: true)
+        embed.update_column(:content_sha1, content_sha1)
+      end
+    end
+
+    post
+  end
+
+  def self.import_remote(user, url, opts=nil)
+    require 'ruby-readability'
+
+    opts = opts || {}
+    doc = Readability::Document.new(open(url).read,
+                                        tags: %w[div p code pre h1 h2 h3 b em i strong a img],
+                                        attributes: %w[href src])
+
+    TopicEmbed.import(user, url, opts[:title] || doc.title, doc.content)
+  end
+
+  # Convert any relative URLs to absolute. RSS is annoying for this.
+  def self.absolutize_urls(url, contents)
+    uri = URI(url)
+    prefix = "#{uri.scheme}://#{uri.host}"
+    prefix << ":#{uri.port}" if uri.port != 80 && uri.port != 443
+
+    fragment = Nokogiri::HTML.fragment(contents)
+    fragment.css('a').each do |a|
+      href = a['href']
+      if href.present? && href.start_with?('/')
+        a['href'] = "#{prefix}/#{href.sub(/^\/+/, '')}"
+      end
+    end
+    fragment.css('img').each do |a|
+      src = a['src']
+      if src.present? && src.start_with?('/')
+        a['src'] = "#{prefix}/#{src.sub(/^\/+/, '')}"
+      end
+    end
+
+    fragment.to_html
+  end
+
+  def self.topic_id_for_embed(embed_url)
+    TopicEmbed.where(embed_url: embed_url).pluck(:topic_id).first
+  end
+
+end
diff --git a/app/views/embed/best.html.erb b/app/views/embed/best.html.erb
@@ -0,0 +1,30 @@
+<header>
+  <%- if @topic_view.posts.present? %>
+    <%= link_to(I18n.t('embed.title'), @topic_view.topic.url, class: 'button', target: '_blank') %>
+  <%- else %>
+    <%= link_to(I18n.t('embed.start_discussion'), @topic_view.topic.url, class: 'button', target: '_blank') %>
+  <%- end if %>
+
+  <%= link_to(image_tag(SiteSetting.logo_url, class: 'logo'), Discourse.base_url) %>
+</header>
+
+<%- if @topic_view.posts.present? %>
+  <%- @topic_view.posts.each do |post| %>
+    <article class='post'>
+      <%= link_to post.created_at.strftime("%e %b %Y"), post.url, class: 'post-date', target: "_blank" %>
+      <div class='author'>
+        <img src='<%= post.user.small_avatar_url %>'>
+        <h3><%= post.user.username %></h3>
+      </div>
+      <div class='cooked'><%= raw post.cooked %></div>
+      <div style='clear: both'></div>
+    </article>
+  <%- end %>
+
+  <footer>
+    <%= link_to(I18n.t('embed.continue'), @topic_view.topic.url, class: 'button', target: '_blank') %>
+    <%= link_to(image_tag(SiteSetting.logo_url, class: 'logo'), Discourse.base_url) %>
+  </footer>
+
+<% end %>
+