From 426a080dac936df2b1906f8ed7c8d81a65db1051 Mon Sep 17 00:00:00 2001 From: Paul Wise Date: Thu, 7 Nov 2024 13:33:37 +0800 Subject: [PATCH] Ignore more share links and add a new share igset for less common share links Also consolidate some existing share links ignore regexes. Sourced from at least these places: https://4232.tuxfamily.org/donate/ https://shkspr.mobi/blog/2025/08/what-about-using-relshare-url-to-expose-sharing-intents/ https://shareopenly.org/share/?url=https%3A%2F%2Fshareopenly.org%2F https://rusafromedia.ru/ https://sharethis.com/ https://demo.addtoany.com/ https://americanhiking.org/ ArchiveBot jobs: bnadesoyfxyj10mfkhe7ek8ku 1z311zcpt9i89jc4uxqnyoukr chnvnbcrhyxyk348km1as411o 81zlxh60qx9keig37g7ti0t2q 2l5mpaegrf01dyqn8atxu3n9h 6kiqbujx4crwxc50sbjr1s8dc 6el9mioyesalnsnz2rb7pl8gq aauz3194fiq12o69kvy6i3vkm 1ux3zrf046lpftiztgk5zsxnw --- db/ignore_patterns/global.json | 29 ++++---- db/ignore_patterns/share.json | 128 +++++++++++++++++++++++++++++++++ 2 files changed, 141 insertions(+), 16 deletions(-) create mode 100644 db/ignore_patterns/share.json diff --git a/db/ignore_patterns/global.json b/db/ignore_patterns/global.json index b7e9bfcf..da26feb8 100644 --- a/db/ignore_patterns/global.json +++ b/db/ignore_patterns/global.json @@ -125,27 +125,28 @@ "^https?://[^/]+\\.rastream\\.com(:\\d+)?/", "^https?://audiots\\.scdn\\.arkena\\.com/", "^https?://(www|draft)\\.blogger\\.com/(navbar\\.g|post-edit\\.g|delete-comment\\.g|comment-iframe\\.g|share-post\\.g|email-post\\.g|blog-this\\.g|delete-backlink\\.g|rearrange|blog_this\\.pyra)\\?", - "^https?://[^/]*tumblr\\.com/(impixu\\?|share(/link/?)?\\?|reblog/)", + "^https?://[^/]*tumblr\\.com/(impixu\\?|share(/link/?)?\\?|widgets/share/tool|reblog/)", "^https?://plus\\.google\\.com/share\\?", "^https?://(apis|plusone)\\.google\\.com/_/\\+1/", "^https?://(ssl\\.|www\\.)?reddit\\.com/(login\\?dest=|submit\\?|static/button/button)", - "^https?://digg\\.com/submit\\?", - "^https?://(www\\.)?facebook\\.com/(plugins/like(box)?\\.php|sharer/sharer\\.php|sharer?\\.php|dialog/(feed|share))\\?", + "^https?://(www\\.)?digg\\.com/submit\\?", + "^https?://((www|m)\\.)?facebook\\.com/(plugins/(like(box)?|share_button)\\.php|(sharer/)?sharer\\.php|dialog/(feed|share|send))\\?", "^https?://www\\.facebook\\.com/captcha/", - "^https?://(www\\.)?twitter\\.com/(share\\?|intent/((re)?tweet|favorite)|home/?\\?status=|\\?status=)", + "^https?://(www\\.)?(twitter|x)\\.com/(share\\?|intent/((re)?(tweet|post)|favorite)|home/?\\?status=|\\?status=)", "^https?://platform\\d?\\.twitter\\.com/widgets/tweet_button.html\\?", - "^https?://www\\.newsvine\\.com/_wine/save\\?", - "^https?://www\\.netvibes\\.com/subscribe\\.php\\?", + "^https?://www\\.newsvine\\.com/(_wine/|_tools/seed&)save\\?", + "^https?://www\\.netvibes\\.com/(share|subscribe\\.php)\\?", "^https?://add\\.my\\.yahoo\\.com/(rss|content)\\?", "^https?://www\\.addtoany\\.com/(add_to/|share_save\\?)", "^https?://www\\.addthis\\.com/bookmark\\.php\\?", "^https?://(www\\.)?pinterest\\.com/pin/create/", - "^https?://www\\.linkedin\\.com/(cws/share|shareArticle)\\?", - "^https?://(www\\.)?stumbleupon\\.com/(submit\\?|badge/embed/)", + "^https?://(www\\.)?linkedin\\.com/(cws/share\\?|shareArticle/?\\?|sharing/share-offsite/\\?|feed/\\?shareActive=true&shareUrl=)", + "^https?://(www\\.)?(telegram|t)\\.me/share/url\\?", + "^https?://(www\\.)?stumbleupon\\.com/((submit|badge)\\?|badge/embed/)", "^https?://csp\\.cyworld\\.com/bi/bi_recommend_pop\\.php\\?", "^https://share\\.flipboard\\.com/bookmarklet/popout\\?", "^https?://flattr.com/submit/auto\\?", - "^https?://(www\\.)?myspace\\.com/Modules/PostTo/", + "^https?://(www\\.)?myspace\\.com/(Modules/PostTo/|post\\?)", "^https?://www\\.google\\.com/bookmarks/mark\\?", "^http://myweb2\\.search\\.yahoo\\.com/myresults/bookmarklet\\?", "^http://vuible\\.com/pins-settings/", @@ -154,15 +155,14 @@ "^http://www\\.blinklist\\.com/index\\.php\\?Action=Blink/addblink\\.php", "^http://sphinn\\.com/index\\.php\\?c=post&m=submit&", "^http://posterous\\.com/share\\?", - "^http://del\\.icio\\.us/post\\?", - "^https?://delicious\\.com/(save|post)\\?", + "^https?://(del\\.icio\\.us|delicious\\.com)/(save|post)\\?", "^https?://(www\\.)?friendfeed\\.com/share\\?", - "^https?://(www\\.)?xing\\.com/(app/user\\?op=share|social_plugins/share\\?)", + "^https?://(www\\.)?xing\\.com/(app/user\\?op=share|social_plugins/share\\?|spi/shares/new\\?)", "^http://iwiw\\.hu/pages/share/share\\.jsp\\?", "^http://memori(\\.qip)?\\.ru/link/\\?", "^http://wow\\.ya\\.ru/posts_(add|share)_link\\.xml\\?", "^https?://connect\\.mail\\.ru/share\\?", - "^http://zakladki\\.yandex\\.ru/newlink\\.xml\\?", + "^https?://zakladki\\.yandex\\.ru/(newlink\\.xml|userarea/links/addfromfav\\.asp)\\?", "^https?://(vkontakte\\.ru|vk\\.com)/share\\.php\\?", "^https?://www\\.odnoklassniki\\.ru/dk\\?st\\.cmd=addShare", "^https?://www\\.google\\.com/(reader/link\\?|buzz/post\\?)", @@ -198,7 +198,6 @@ "^https?://media\\.opb\\.org/clips/embed/.+\\.js$", "^https?://[^.]+\\.pinterest\\.[^/]+/join/", "\\?wordfence_(logHuman|lh)=1", - "^https?://[^/]*tumblr\\.com/widgets/share/tool", "amp;amp;", "^https?://accounts\\.google\\.com/o/oauth2/auth\\?", "^https?://www\\.facebook\\.com/dialog/oauth\\?", @@ -209,8 +208,6 @@ "^https?://www\\.testtesttest\\.com/", "^https?://(www\\.cgzxb\\.com|imgnet\\.com|216\\.218\\.185\\.162)/", "^https?://assets\\.squarespace\\.com/universal/scripts-compressed/src/main/webapp/universal/", - "^https?://www\\.facebook\\.com/dialog/send\\?", - "^https?://linkedin\\.com/shareArticle\\?", "^https?://(line\\.me/R/msg/text/|lineit\\.line\\.me/share/ui|social-plugins\\.line\\.me/lineit/share)\\?", "^https?://((videos|360)\\.littlstar\\.com|ls-video-masters\\.s3-accelerate\\.amazonaws\\.com)/.*\\.(mp4|mov|mkv|webm|avi|yuv|y4m)$", "^https?://platform\\.iteratehq\\.com/[0-9a-f-]+\\.png$", diff --git a/db/ignore_patterns/share.json b/db/ignore_patterns/share.json new file mode 100644 index 00000000..25b8ef84 --- /dev/null +++ b/db/ignore_patterns/share.json @@ -0,0 +1,128 @@ +{ + "name": "share", + "patterns": [ + "^https?://((web|www|join)\\.)?skype\\.com/(invite|share)\\?", + "^https?://((www|ww\\d+)\\.)zabox\\.net/submit\\.php\\?", + "^https?://(www\\.)?100zakladok\\.ru/save/\\?", + "^https?://(www\\.)?addtoany.com/share#", + "^https?://(www\\.)?bebo\\.com/c/share\\?", + "^https?://(www\\.)?blogengage\\.com/submit\\.php\\?", + "^https?://(www\\.)?bloglines\\.com/sub/http", + "^https?://(www\\.)?blogmarks\\.net/my/new\\.php\\?", + "^https?://(www\\.)?bobrdobr\\.ru/addext\\.html\\?", + "^https?://(www\\.)?bonzobox\\.com/toolbar/add\\?", + "^https?://(www\\.)?box\\.(com|net)/api/1\\.0/import\\?", + "^https?://(www\\.)?bsky\.app/intent/compose\\?", + "^https?://(www\\.)?buffer\\.com/add\\?", + "^https?://(www\\.)?bufferapp\\.com/add\\?", + "^https?://(www\\.)?connect\\.ok\\.ru/dk\\?", + "^https?://(www\\.)?cosocial\\.ca/share\\?", + "^https?://(www\\.)?current\\.com/clipper\\.html?\\?", + "^https?://(www\\.)?designbump\\.com/submit\\?", + "^https?://(www\\.)?designfloat\\.com/submit\\.php\\?", + "^https?://(www\\.)?diigo\\.com/post\\?", + "^https?://(www\\.)?douban\\.com/recommend/\\?", + "^https?://(www\\.)?dzone\\.com(:443)?/links/add\\.html\\?", + "^https?://(www\\.)?ekudos\\.nl/artikel/nieuw\\?", + "^https?://(www\\.)?evernote\\.com/clip\\.action\\?", + "^https?://(www\\.)?faqpal\\.com/submit\\?", + "^https?://(www\\.)?fark\\.com/submit\\?", + "^https?://(www\\.)?fintel.io/submit\\?", + "^https?://(www\\.)?foursquare\\.com/intent/venue/", + "^https?://(www\\.)?furl\\.net/storeIt\\.jsp\\?", + "^https?://(www\\.)?fwisp\\.com/submit\\?", + "^https?://(www\\.)?gab\\.com/compose\\?", + "^https?://(www\\.)?getpocket\\.com/(login|edit|save)\\?", + "^https?://(www\\.)?gettr\\.com/share\\?", + "^https?://(www\\.)?globalgrind\\.com/submission/submit\\.aspx\\?", + "^https?://(www\\.)?goodreads\\.com/quotes\\?", + "^https?://(www\\.)?houzz\\.com/imageClipperUpload\\?", + "^https?://(www\\.)?hyves\\.nl/profilemanage/add/tips/\\?", + "^https?://(www\\.)?instapaper\\.com/(edit|hello2)\\?", + "^https?://(www\\.)?iorbix\\.com/m-share\\?", + "^https?://(www\\.)?izeby\\.com/submit\\.php\\?", + "^https?://(www\\.)?jumptags\\.com/add/\\?", + "^https?://(www\\.)?kaevur\\.com/submit\\.php\\?", + "^https?://(www\\.)?kooapp\\.com/create\\?", + "^https?://(www\\.)?lemmy\\.world/create_post\\?", + "^https?://(www\\.)?livejournal\\.com/update\\.bml\\?", + "^https?://(www\\.)?mastodon\\.social/share\\?", + "^https?://(www\\.)?meneame\\.net/submit(\\.php)?\\?", + "^https?://(www\\.)?micro\\.blog/share\\?", + "^https?://(www\\.)?mister-wong\\.com/addurl/\\?", + "^https?://(www\\.)?mix\\.com/add\\?", + "^https?://(www\\.)?mixx\\.com/submit\\?", + "^https?://(www\\.)?moemesto\\.ru/post\\.php\\?", + "^https?://(www\\.)?mstdn\\.social/share\\?", + "^https?://(www\\.)?mylinkvault\\.com/link-page\\.php\\?", + "^https?://(www\\.)?n4g\\.com/(tips(\\.aspx)?|submit/story/)\\?", + "^https?://(www\\.)?nbcnews\\.com/\\?u=", + "^https?://(www\\.)?netvibes\\.com/share\\?", + "^https?://(www\\.)?netvouz\\.com/action/submitBookmark\\?", + "^https?://(www\\.)?newsgator\\.com/ngs/subscriber/subext\\.aspx\\?", + "^https?://(www\\.)?newsvine\\.com/_tools/seed&save\\?", + "^https?://(www\\.)?nextdoor\\.com/sharekit/\\?", + "^https?://(www\\.)?nujij\\.nl/jij\\.lynkx\\?", + "^https?://(www\\.)?parler\\.com/new-post\\?", + "^https?://(www\\.)?pfbuzz\\.com/submit\\?", + "^https?://(www\\.)?pinboard\\.in/add\\?", + "^https?://(www\\.)?ping\\.fm/ref/\\?", + "^https?://(www\\.)?plaxo\\.com/\\?share_link=", + "^https?://(www\\.)?plurk\\.com/(.*[?&]qualifier=shares($|&)|m\\?)", + "^https?://(www\\.)?printfriendly\\.com/print\\?", + "^https?://(www\\.)?propeller\\.com/submit/\\?", + "^https?://(www\\.)?pusha\\.se/posta\\?", + "^https?://(www\\.)?refind\\.com/?\\?url=", + "^https?://(www\\.)?scriptandstyle\\.com/submit\\?", + "^https?://(www\\.)?shareopenly\\.org/share/\\?", + "^https?://(www\\.)?slashdot\\.org/bookmark\\.pl\\?", + "^https?://(www\\.)?snapchat\\.com/scan\\?", + "^https?://(www\\.)?springpadit\\.com/clip\\.action\\?", + "^https?://(www\\.)?squidoo\\.com(:443)?/lensmaster/bookmark\\?", + "^https?://(www\\.)?strands\\.com/tools/share/webpage\\?", + "^https?://(www\\.)?stumpedia\\.com/submit\\?", + "^https?://(www\\.)?surfingbird\\.ru/share\\?", + "^https?://(www\\.)?thewebblend\\.com/submit\\?", + "^https?://(www\\.)?threads\\.net/(login|intent/post)\\?", + "^https?://(www\\.)?tipd\\.com/submit\\.php\\?", + "^https?://(www\\.)?trello\\.com/add-card\\?", + "^https?://(www\\.)?twittley\\.com/submit/\\?", + "^https?://(www\\.)?viadeo\\.com/(shareit/share/\\?|\\?url=)", + "^https?://(www\\.)?virb\\.com/share\\?", + "^https?://(www\\.)?virgilio\\.it/?\\?url=", + "^https?://(www\\.)?wordpress\\.com/(wp-admin/)?press-this\\.php\\?", + "^https?://(www\\.)?ww38\\.springpadit\\.com/clip\\.action\\?", + "^https?://(www\\.)?wykop\\.pl/dodaj\\?", + "^https?://(www\\.)?xerpi\\.com/block/add_link_from_extension\\?", + "^https?://(www\\.)?yammer\\.com/messages/new\\?", + "^https?://(www\\.)?yandex\\.ru/chat\\?", + "^https?://(www\\.)?yummly\\.com/urb/verify\\?", + "^https?://(www\\.)amazon.com/(gp/)?wishlist/add\\?", + "^https?://app\\.clouthub\\.com/share\\?", + "^https?://bookmark\\.fc2\\.com/user/post\\?", + "^https?://bookmarks\\.ning\\.com/addItem\\.php\\?", + "^https?://bookmarks\\.yahoo\\.co\\.jp/bookmarklet/showpopup\\?", + "^https?://buzz\\.yahoo\\.com/submit/\\?", + "^https?://clip\\.livedoor\\.com/clip/add\\?", + "^https?://compose\\.mail\\.yahoo\\.com/(mrd/)?\\?", + "^https?://connect\\.ok\\.ru/offer\\?", + "^https?://e\\.my\\.yahoo\\.co\\.jp/config/jp_promo_content\\?", + "^https?://fusion\\.google\\.com/add\\?", + "^https?://mail\\.live\\.com/\\?rru=compose\\?", + "^https?://oknotizie\\.virgilio\\.it/post\\?", + "^https?://outlook\\.(office|live)\\.com/mail/deeplink/compose\\?", + "^https?://promote\\.orkut\\.com/preview\\?", + "^https?://pushtokindle\\.fivefilters\\.org/send\\.php\\?", + "^https?://reader\\.livedoor\\.com/subscribe/http", + "^https?://sapp\\.ir/share/url\\?", + "^https?://share\\.diasporafoundation\\.org/\\?", + "^https?://share\\.naver\\.com/web/shareView\\.nhn\\?", + "^https?://sns\\.qzone\\.qq\\.com/cgi-bin/qzshare/cgi_qzshare_onekey\\?", + "^https?://story\\.kakao\\.com/share\\?", + "^https?://teams\\.microsoft\\.com/(l/entity/com\\.microsoft\\.teamspace\\.tab\\.url\\.)?share\\?", + "^https?://viadeo\\.journaldunet\\.com/shareit/share/\\?", + "^https?://webmail\\.aol\\.com/Mail/ComposeMessage\\.aspx\\?", + "^https?://widget\\.renren\\.com/dialog/share\\?", + ], + "type": "ignore_patterns" +}