1 | import re, sys, argparse
|
---|
2 |
|
---|
3 | # Functions to make title consistent between platforms
|
---|
4 | faceMarkupTags = [
|
---|
5 | "b",
|
---|
6 | "i",
|
---|
7 | "u",
|
---|
8 | "ovl",
|
---|
9 | "sup",
|
---|
10 | "sub",
|
---|
11 | "scp",
|
---|
12 | "tt"
|
---|
13 | ]
|
---|
14 |
|
---|
15 | mathMLTag = "mml"
|
---|
16 |
|
---|
17 | tagRegex = "[<]\/?[^ ]+?[>]"
|
---|
18 |
|
---|
19 | def clear_face_markup(title):
|
---|
20 | # Get rid of all facemarkup tags
|
---|
21 | regexTitle = title
|
---|
22 | for tagString in faceMarkupTags:
|
---|
23 | newString = "<" + tagString + ">"
|
---|
24 | regexTitle = regexTitle.replace(newString, "")
|
---|
25 | newString = "</" + tagString + ">"
|
---|
26 | regexTitle = regexTitle.replace(newString, "")
|
---|
27 |
|
---|
28 | print("regex done: " + regexTitle)
|
---|
29 | return regexTitle
|
---|
30 |
|
---|
31 | def clear_math_ml_tags(title):
|
---|
32 | # find all regex matches and remove mathml tags
|
---|
33 | regexTitle = title
|
---|
34 | regexMatches = re.findall(tagRegex, regexTitle)
|
---|
35 | for match in regexMatches:
|
---|
36 | if match.find(mathMLTag) != -1:
|
---|
37 | regexTitle = regexTitle.replace(match, "")
|
---|
38 | return regexTitle
|
---|
39 |
|
---|
40 | def clear_tags(title):
|
---|
41 | facemarkupRemovedTitle = clear_face_markup(title)
|
---|
42 | return clear_math_ml_tags(facemarkupRemovedTitle)
|
---|
43 |
|
---|
44 | def add_subtitles():
|
---|
45 | return "Title with added subtitles"
|
---|
46 |
|
---|
47 | # commandline interface for checking things
|
---|
48 | def main():
|
---|
49 | parser = argparse.ArgumentParser(
|
---|
50 | prog="sanitise",
|
---|
51 | description="sanitises title input for consistent output"
|
---|
52 | )
|
---|
53 | parser.add_argument("title", help="Title of the article")
|
---|
54 | parser.add_argument("-s", "--subtitle", help="Subtitle (if one provided)")
|
---|
55 | args = parser.parse_args()
|
---|
56 |
|
---|
57 | print(args)
|
---|
58 |
|
---|
59 | tagClearTitle = clear_tags(args.title)
|
---|
60 |
|
---|
61 | print(tagClearTitle)
|
---|
62 |
|
---|
63 |
|
---|
64 | if __name__ == "__main__":
|
---|
65 | main()
|
---|