-
Notifications
You must be signed in to change notification settings - Fork 70
Requirement Parsers
Ching Chang edited this page Jul 14, 2022
·
15 revisions
This documentation specifies the string patterns each parser in /app/WebParsing/ReqParser.hs accepts in Extended Backus-Naur Form (EBNF).
The main notations used are
| Notation | Meaning |
|---|---|
a | b |
a or b |
a, b |
a followed by b |
a - b |
a but not b |
n * a |
n number of a's |
"..." |
string literal (case insensitive[1]) |
[...] |
optional |
{...} |
1 or more |
(...) |
grouping |
(*...*) |
comment |
, has a higher precedence than |.
This means a, b | c, d translates to "(a followed by b) or (c followed by d)"
not "a followed by (b or c) followed by d".
In addition, some logic cannot be represented in EBNF,
and are therefore described in plain English and expressed as ? ... ?.
| Symbol | Definition |
|---|---|
| digit | "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" |" 9" |
| letter | ? all upper and lower case English letters ? |
| character | ? all ASCII characters ? |
Note: The parsers do not necessarily return the strings that they consume.
For example, gradeParser may consume "(73%), and" but would only return 73.
requirement = ? and (category | "(", category, ")") ?;
category = fces | course | cgpa | programOr | rawText;
(* fces *)
fces = [completionPrefix], float, [department], fceSeparator, [includingSeparator, ? and category ?], [fromSeparator], [anyModifier], modifiers;
modifiers = modifier, [{[fromSeparator], modifiersNoRaw}];
modifier = modifiersNoRaw | rawModifier;
modifiersNoRaw = ? and course ? | level | department;
rawModifier = {character - (andSeparator | orSeparator)};
level = 3 * digit, ["+" | "-"], "level", [courseLiteral], ["or higher"];
department = {character - (courseLiteral | fceSeparator | orSeparator | andSeparator | fromSeparator)}, [courseLiteral];
(* course *)
course = courseOptionalCutoff | cutoffBefore | cutoffBefore;
courseOptionalCutoff = courseID, ["(", (percentGrade | letterGrade | info), ")"];
cutoffBefore = ["an" | "a " ? not followed by "in" ?], [("minimum grade" | "minimum mark" | "minimum" | "grade" | "final grade" | "at least"), ["of"]], grade, [{character - courseID}], courseID;
cutoffAfter = courseID, "(", [{character - ("(" | ")" | orSeparator | andSeparator | grade ? not followed by digit or letter ?)}], [{character - ")"}], ")";
courseID = (utsgCourseCode | utscCourseCode), ("H" | "Y"), digit;
utsgCourseCode = 3 * letter, 3 * digit;
utscCourseCode = 4 * letter, 2 * digit;
grade = "(", (percentParser | letterGrade), ")" | percentParser | letterGrade;
percentGrade = 2 * digit ? not followed by another digit ?, ["%"];
letterGrade = ("A" | "B" | "C" | "D" | "E" | "F") ? not followed by another letter ?, ["+", "-"];
info = {character - ")"}, ")";
(* cgpa *)
cgpa = [cgpaPrefix], float, ["cGPA"], {character - (andSeparator | orSeparator)};
(* program *)
programOr = programPrefix, programGroup, [{progOrSeparator, programGroup}];
programGroup = program, [{progGroupSeparator, program}], degreeType, [{orSeparator, degreeType}], [degreeType | programSuffix];
program = [{character - (degreeType | programSuffix | progGroupSeparator | progOrSeparator | "." | ";")}];
(* rawText *)
rawText = {character - (";" | "\n")} | "";
(* Other *)
fceSeparator = "FCEs." | "FCEs" | "FCE." | "FCE" | "credits" | "credit" | "full-course equivalents" | "additional credits" | "additional credit";
oneOfSeparator = ("one of either" | "one of the following" | "at least one of" | "one of" | "1 of" | "at least 1 of"), [":"];
orSeparator = "/" | "or" | ", or";
andSeparator = ", and" | ", an additional" | ", additional" | "," | "and" | "; and" | "." | ";" | "&" | "+" | "plus";
fromSeparator = "of any of the following:" | "of" | "from the following:" | "from the" | "from:" | "from" | "at the" | "at" | "in";
progGroupSeparator = "," | "or a" | "or";
progOrSeparator = "or in a";
includingSeparator = "including" | ", including";
completionPrefix = "Completion of at least" | "Completion of a minimum of" | "Completion of" | "have completed" | "At least one additional" | "At least one" | "At least" | "Any" | "a";
programPrefix = "admission to" | "enrolment in the" | "enrolment in an" | "enrolment in a" | "enrolment in";
cgpaPrefix = "and will normally have a CGPA of at least" | "with a CGPA of at least" | "with a minimum cGPA of" | "and a minimum cGPA of" | "and minimum cGPA of" | "a CGPA of at least" | "a minimum cGPA of" | "minimum cGPA of" | "with" | "cGPA";
programSuffix = "program of study" | "program";
degreeType = "major" | "minor" | "specialist";
courseLiteral = "course" | "courses";
anyModifier = "any", ("field" | "subject");
float = {digit}, [".", {digit}];
(* Helpers (see footnote [2]) *)
oneOf p = oneOfSeparator, p, [{(orSeparator | andSeparator), p}];
or p = p, [{orSeparator, p}];
and p = p [{andSeparator, (? oneOf p ? | ? or p ?)}];- String literals are not case insensitive in EBNF by default, but for the simplicity of the documentation, assume they are :)
- These are context-sensitive and therefore not EBNF. However, we use them here to make the grammar more concise without the repetitions.