Changeset 38472
- Timestamp:
- 2023-12-04T15:23:58+13:00 (6 months ago)
- Location:
- other-projects/metadata-encoding/py
- Files:
-
- 1 added
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
other-projects/metadata-encoding/py/comparisonTest/sanitise.py
r38471 r38472 3 3 # Functions to make title consistent between platforms 4 4 faceMarkupTags = [ 5 "b" 6 "i" 7 "u" 8 "ovl" 9 "sup" 10 "sub" 11 "scp" 5 "b", 6 "i", 7 "u", 8 "ovl", 9 "sup", 10 "sub", 11 "scp", 12 12 "tt" 13 13 ] … … 15 15 mathMLTag = "mml" 16 16 17 tagRegex = "[<]\/?[^ ]+ [>]"17 tagRegex = "[<]\/?[^ ]+?[>]" 18 18 19 19 def clear_tags(title): 20 # List of all HITS :DD 20 # Get rid of all facemarkup tags 21 regexTitle = title 21 22 for tagString in faceMarkupTags: 22 23 newString = "<" + tagString + ">" 23 title = re.sub(newString, "", title) 24 regexTitle = regexTitle.replace(newString, "") 25 newString = "</" + tagString + ">" 26 regexTitle = regexTitle.replace(newString, "") 24 27 25 print(title) 26 return "Title to return" 28 print("regex done: " + regexTitle) 29 30 # find all regex matches and remove mathml tags 31 regexMatches = re.findall(tagRegex, regexTitle) 32 print(regexMatches) 33 for match in regexMatches: 34 if match.find(mathMLTag) != -1: 35 regexTitle = regexTitle.replace(match, "") 36 return regexTitle 27 37 28 38 def add_subtitles():
Note:
See TracChangeset
for help on using the changeset viewer.