Skip to content

Commit decf19d

Browse files
committed
update generate_known_tag.R to accomodate updated MDN sites
1 parent 3419e2f commit decf19d

File tree

5 files changed

+102
-55
lines changed

5 files changed

+102
-55
lines changed

R/known_tags.R

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,8 @@ known_tags <- c(
2727
"code", # html
2828
"col", # html
2929
"colgroup", # html
30-
"color-profile", # svg
31-
"command", #
30+
"color-profile", # deprecated
31+
"command", # deprecated
3232
"data", # html
3333
"datalist", # html
3434
"dd", # html
@@ -45,7 +45,7 @@ known_tags <- c(
4545
"ellipse", # svg
4646
"em", # html
4747
"embed", # html
48-
"eventsource", #
48+
"eventsource", # deprecated
4949
"feBlend", # svg
5050
"feColorMatrix", # svg
5151
"feComponentTransfer",# svg
@@ -89,7 +89,7 @@ known_tags <- c(
8989
"hatchpath", # svg
9090
"head", # html
9191
"header", # html
92-
"hgroup", # html
92+
"hgroup", # deprecated
9393
"hr", # html
9494
"html", # html
9595
"i", # html
@@ -99,7 +99,7 @@ known_tags <- c(
9999
"input", # html
100100
"ins", # html
101101
"kbd", # html
102-
"keygen", #
102+
"keygen", # deprecated
103103
"label", # html
104104
"legend", # html
105105
"li", # html
@@ -111,6 +111,7 @@ known_tags <- c(
111111
"mark", # html
112112
"marker", # svg
113113
"mask", # svg
114+
"math", # html
114115
"menu", # html
115116
"meta", # html
116117
"metadata", # svg
@@ -130,15 +131,16 @@ known_tags <- c(
130131
"picture", # html
131132
"polygon", # svg
132133
"polyline", # svg
134+
"portal", # html
133135
"pre", # html
134136
"progress", # html
135137
"q", # html
136138
"radialGradient", # svg
137-
"rb", # html
139+
"rb", # deprecated
138140
"rect", # svg
139141
"rp", # html
140142
"rt", # html
141-
"rtc", # html
143+
"rtc", # deprecated
142144
"ruby", # html
143145
"s", # html
144146
"samp", # html
@@ -148,7 +150,7 @@ known_tags <- c(
148150
"set", # svg
149151
"slot", # html
150152
"small", # html
151-
"solidcolor", # svg
153+
"solidcolor", # deprecated
152154
"source", # html
153155
"span", # html
154156
"stop", # svg
@@ -157,7 +159,7 @@ known_tags <- c(
157159
"sub", # html
158160
"summary", # html
159161
"sup", # html
160-
"svg", # svg
162+
"svg", # html svg
161163
"switch", # svg
162164
"symbol", # svg
163165
"table", # html

scripts/generate_known_tags.R

Lines changed: 89 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -3,47 +3,78 @@
33
## This script web scrapes two Mozilla websites for HTML and SVG tag elements.
44
## All HTML tags
55

6+
library(rvest)
7+
library(dplyr)
8+
9+
# Note: Mozilla seems to have a more up to date set of what is possible / not obsolete compared to W3 schools
10+
base_url <- "https://developer.mozilla.org/en-US/docs/Web"
11+
12+
html_tag_dfs <- read_html(file.path(base_url, "HTML", "Element")) %>%
13+
html_table()
14+
15+
# The last table is obsolete/deprecated elements
16+
n_dfs <- length(html_tag_dfs)
17+
18+
html_tags_df <- html_tag_dfs[-n_dfs] %>%
19+
bind_rows() %>%
20+
# h1-h6 all appear in one comma-separated row
21+
mutate(name = strsplit(Element, ", ")) %>%
22+
tidyr::unnest(name) %>%
23+
select(Element = name, Description) %>%
24+
transmute(
25+
name = sub("^<", "", sub(">$", "", Element)),
26+
desc = paste0(
27+
Description, "\n\n ",
28+
"Learn more at ",
29+
file.path(base_url, "HTML", "Element", name)
30+
)
31+
)
32+
33+
svg <- read_html(file.path(base_url, "SVG", "Element"))
34+
35+
# Due to a lack of structure on the SVG page,
36+
# this seems to be the best way to target just
37+
# the hyperlinks under the "SVG elements A to Z" section
38+
svg_tags <- lapply(letters, function(x) {
39+
html_elements(svg, sprintf("h3[id=%s] + div > ul > li > a", x)) %>%
40+
html_attr("href") %>%
41+
basename()
42+
})
43+
44+
# TODO: evenetually it might be nice to also scrape
45+
# the descriptions by following the url
46+
svg_tags_df <- tibble(
47+
name = unlist(svg_tags),
48+
desc = sprintf(
49+
"Creates the <%s> SVG element. Learn more at %s",
50+
name, file.path(base_url, "SVG", "Element", name)
51+
)
52+
)
653

7-
library(magrittr)
8-
9-
10-
get_tags <- function(url, css) {
11-
url %>%
12-
httr::GET() %>%
13-
httr::content() %>%
14-
rvest::html_nodes(css) %>%
15-
rvest::html_text() %>%
16-
sub("^<", "", .) %>%
17-
sub(">$", "", .) %>%
18-
sort() %>%
19-
unique() %>%
20-
print()
21-
}
22-
23-
## W3 Schools
24-
## Mozilla seemed to have a more up to date set of what is possible / not obsolete
25-
# w3html_tags <- get_tags("https://www.w3schools.com/tags/default.asp", "#htmltags tr td:first-child a:not(.notsupported)")
26-
## Had extra tags not seen in other places `altGlyph`
27-
# w3svg_tags <- get_tags("https://www.w3schools.com/graphics/svg_reference.asp", "#main td:first-child")
28-
29-
## W3 Standard
30-
# # The original spec websites made it very hard to determine what was obsolete / shouldn't be used and what was to be used
31-
# html_tags <- get_tags("https://www.w3.org/TR/2018/WD-html53-20181018/single-page.html", "dfn[data-dfn-type='element']")
32-
# svg_tags <- get_tags("https://svgwg.org/svg2-draft/single-page.html", "dfn[data-dfn-type='element']")
3354

55+
# Save a JSON version so other languages can read them in easily
56+
cat(
57+
jsonlite::toJSON(html_tags_df),
58+
file = "scripts/html_tags.json"
59+
)
3460

35-
## Mozilla
36-
# do not include the last section of obsolete tags
37-
html_tags <- get_tags("https://developer.mozilla.org/en-US/docs/Web/HTML/Element", "article table:not(:last-child) td:first-child code")
38-
# html_tags_obsolete <- get_tags("https://developer.mozilla.org/en-US/docs/Web/HTML/Element", "#content table:last-child td:first-child a")
61+
cat(
62+
jsonlite::toJSON(svg_tags_df),
63+
file = "scripts/svg_tags.json"
64+
)
3965

40-
# do not include tags that do not contain documentation articles
41-
# Only pull from the index, as elements not in the index are considered obsolete. (ex: altGlyph or font-face)
42-
svg_tags <- get_tags("https://developer.mozilla.org/en-US/docs/Web/SVG/Element", "article .index a:not([rel='nofollow']) code")
66+
html_tags <- html_tags_df$name
67+
svg_tags <- svg_tags_df$name
4368

4469

4570
# Both SVG2 and HTML5
4671
svg_tags[svg_tags %in% html_tags]
72+
#> [1] "a" "script" "style" "svg" "title"
73+
74+
75+
new_tags <- c(svg_tags, html_tags) %>%
76+
unique() %>%
77+
sort()
4778

4879
# Call using callr::r to avoid any devtools loaded htmltools::tags namespace issues
4980
cran_tags <- callr::r(
@@ -54,30 +85,43 @@ cran_tags <- callr::r(
5485
show = TRUE
5586
)
5687

57-
new_tags <- c(svg_tags, html_tags) %>% unique() %>% sort()
58-
5988
# tags which should not HTML5 / SVG2 supported
6089
setdiff(cran_tags, new_tags)
61-
#> "command" "eventsource" "keygen"
90+
#> [1] "color-profile" "command" "eventsource" "hgroup"
91+
#> [5] "keygen" "rb" "rtc" "solidcolor"
6292

6393

6494
# New HTML5 tags
6595
setdiff(html_tags, cran_tags)
66-
#> "rb" "rtc" "slot"
96+
#> "portal" "math"
97+
6798
# New SVG2 tags
6899
setdiff(svg_tags, cran_tags)
69-
### ...basically all svg tags
100+
#> character(0)
70101

71102
# combine old and new tags so that old tags are not lost
72-
save_tags <- c(new_tags, cran_tags) %>% unique() %>% sort()
73-
74-
# Save a JSON version so other languages can read them in easily
75-
cat(jsonlite::toJSON(save_tags), file = "scripts/known_tags.json")
103+
save_tags <- c(new_tags, cran_tags) %>%
104+
unique() %>%
105+
sort()
76106

77107
save_line <- paste0(
78-
format(paste0(" \"", save_tags, "\"", ifelse(seq_along(save_tags) == length(save_tags), "", ",")), justify = "left"), "#",
79-
ifelse(save_tags %in% html_tags, " html", " "),
80-
ifelse(save_tags %in% svg_tags, " svg", "")
108+
format(
109+
paste0(
110+
" \"", save_tags, "\"",
111+
ifelse(
112+
seq_along(save_tags) == length(save_tags),
113+
"", ","
114+
)
115+
),
116+
justify = "left"
117+
),
118+
"#",
119+
case_when(
120+
save_tags %in% html_tags & save_tags %in% svg_tags ~ " html svg",
121+
save_tags %in% html_tags ~ " html",
122+
save_tags %in% svg_tags ~ " svg",
123+
TRUE ~ " deprecated"
124+
)
81125
) %>%
82126
sub("\\s+$", "", .)
83127
cat(

scripts/html_tags.json

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

scripts/known_tags.json

Lines changed: 0 additions & 1 deletion
This file was deleted.

scripts/svg_tags.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
[{"name":"a","desc":"Creates the <a> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/a"},{"name":"animate","desc":"Creates the <animate> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/animate"},{"name":"animateMotion","desc":"Creates the <animateMotion> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/animateMotion"},{"name":"animateTransform","desc":"Creates the <animateTransform> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/animateTransform"},{"name":"circle","desc":"Creates the <circle> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/circle"},{"name":"clipPath","desc":"Creates the <clipPath> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/clipPath"},{"name":"defs","desc":"Creates the <defs> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/defs"},{"name":"desc","desc":"Creates the <desc> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/desc"},{"name":"discard","desc":"Creates the <discard> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/discard"},{"name":"ellipse","desc":"Creates the <ellipse> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/ellipse"},{"name":"feBlend","desc":"Creates the <feBlend> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/feBlend"},{"name":"feColorMatrix","desc":"Creates the <feColorMatrix> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/feColorMatrix"},{"name":"feComponentTransfer","desc":"Creates the <feComponentTransfer> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/feComponentTransfer"},{"name":"feComposite","desc":"Creates the <feComposite> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/feComposite"},{"name":"feConvolveMatrix","desc":"Creates the <feConvolveMatrix> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/feConvolveMatrix"},{"name":"feDiffuseLighting","desc":"Creates the <feDiffuseLighting> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/feDiffuseLighting"},{"name":"feDisplacementMap","desc":"Creates the <feDisplacementMap> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/feDisplacementMap"},{"name":"feDistantLight","desc":"Creates the <feDistantLight> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/feDistantLight"},{"name":"feDropShadow","desc":"Creates the <feDropShadow> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/feDropShadow"},{"name":"feFlood","desc":"Creates the <feFlood> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/feFlood"},{"name":"feFuncA","desc":"Creates the <feFuncA> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/feFuncA"},{"name":"feFuncB","desc":"Creates the <feFuncB> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/feFuncB"},{"name":"feFuncG","desc":"Creates the <feFuncG> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/feFuncG"},{"name":"feFuncR","desc":"Creates the <feFuncR> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/feFuncR"},{"name":"feGaussianBlur","desc":"Creates the <feGaussianBlur> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/feGaussianBlur"},{"name":"feImage","desc":"Creates the <feImage> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/feImage"},{"name":"feMerge","desc":"Creates the <feMerge> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/feMerge"},{"name":"feMergeNode","desc":"Creates the <feMergeNode> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/feMergeNode"},{"name":"feMorphology","desc":"Creates the <feMorphology> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/feMorphology"},{"name":"feOffset","desc":"Creates the <feOffset> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/feOffset"},{"name":"fePointLight","desc":"Creates the <fePointLight> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/fePointLight"},{"name":"feSpecularLighting","desc":"Creates the <feSpecularLighting> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/feSpecularLighting"},{"name":"feSpotLight","desc":"Creates the <feSpotLight> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/feSpotLight"},{"name":"feTile","desc":"Creates the <feTile> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/feTile"},{"name":"feTurbulence","desc":"Creates the <feTurbulence> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/feTurbulence"},{"name":"filter","desc":"Creates the <filter> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/filter"},{"name":"foreignObject","desc":"Creates the <foreignObject> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/foreignObject"},{"name":"g","desc":"Creates the <g> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/g"},{"name":"hatch","desc":"Creates the <hatch> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/hatch"},{"name":"hatchpath","desc":"Creates the <hatchpath> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/hatchpath"},{"name":"image","desc":"Creates the <image> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/image"},{"name":"line","desc":"Creates the <line> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/line"},{"name":"linearGradient","desc":"Creates the <linearGradient> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/linearGradient"},{"name":"marker","desc":"Creates the <marker> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/marker"},{"name":"mask","desc":"Creates the <mask> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/mask"},{"name":"metadata","desc":"Creates the <metadata> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/metadata"},{"name":"mpath","desc":"Creates the <mpath> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/mpath"},{"name":"path","desc":"Creates the <path> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/path"},{"name":"pattern","desc":"Creates the <pattern> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/pattern"},{"name":"polygon","desc":"Creates the <polygon> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/polygon"},{"name":"polyline","desc":"Creates the <polyline> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/polyline"},{"name":"radialGradient","desc":"Creates the <radialGradient> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/radialGradient"},{"name":"rect","desc":"Creates the <rect> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/rect"},{"name":"script","desc":"Creates the <script> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/script"},{"name":"set","desc":"Creates the <set> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/set"},{"name":"stop","desc":"Creates the <stop> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/stop"},{"name":"style","desc":"Creates the <style> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/style"},{"name":"svg","desc":"Creates the <svg> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/svg"},{"name":"switch","desc":"Creates the <switch> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/switch"},{"name":"symbol","desc":"Creates the <symbol> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/symbol"},{"name":"text","desc":"Creates the <text> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/text"},{"name":"textPath","desc":"Creates the <textPath> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/textPath"},{"name":"title","desc":"Creates the <title> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/title"},{"name":"tspan","desc":"Creates the <tspan> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/tspan"},{"name":"use","desc":"Creates the <use> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/use"},{"name":"view","desc":"Creates the <view> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/view"}]

0 commit comments

Comments
 (0)