Changeset 38472


Ignore:
Timestamp:
2023-12-04T15:23:58+13:00 (6 months ago)
Author:
jc550
Message:

add script to test subtitle joining and add tag removal to titles

Location:
other-projects/metadata-encoding/py
Files:
1 added
1 edited

Legend:

Unmodified
Added
Removed
  • other-projects/metadata-encoding/py/comparisonTest/sanitise.py

    r38471 r38472  
    33# Functions to make title consistent between platforms
    44faceMarkupTags = [
    5     "b"
    6     "i"
    7     "u"
    8     "ovl"
    9     "sup"
    10     "sub"
    11     "scp"
     5    "b",
     6    "i",
     7    "u",
     8    "ovl",
     9    "sup",
     10    "sub",
     11    "scp",
    1212    "tt"
    1313]
     
    1515mathMLTag = "mml"
    1616
    17 tagRegex = "[<]\/?[^ ]+[>]"
     17tagRegex = "[<]\/?[^ ]+?[>]"
    1818
    1919def clear_tags(title):
    20     # List of all HITS :DD
     20    # Get rid of all facemarkup tags
     21    regexTitle = title
    2122    for tagString in faceMarkupTags:
    2223        newString = "<" + tagString + ">"
    23         title = re.sub(newString, "", title)
     24        regexTitle = regexTitle.replace(newString, "")
     25        newString = "</" + tagString + ">"
     26        regexTitle = regexTitle.replace(newString, "")
    2427
    25     print(title)
    26     return "Title to return"
     28    print("regex done: " + regexTitle)
     29
     30    # find all regex matches and remove mathml tags
     31    regexMatches = re.findall(tagRegex, regexTitle)
     32    print(regexMatches)
     33    for match in regexMatches:
     34        if match.find(mathMLTag) != -1:
     35            regexTitle = regexTitle.replace(match, "")
     36    return regexTitle
    2737
    2838def add_subtitles():
Note: See TracChangeset for help on using the changeset viewer.