source: main/trunk/model-sites-dev/twso/PreProcessing/namefinder1.py@ 34227

Last change on this file since 34227 was 34227, checked in by kjdon, 4 years ago

the processing files to help you generate metadata.xml files for the programmes

File size: 4.1 KB
Line 
1import sys
2import os
3
4#open the file and return an array of the lines in the file
5def openFile(filename):
6
7 programme = open(filename)
8 lines = programme.readlines()
9 programme.close()
10
11 new_lines = []
12
13 for line in lines:
14 line = line.strip()
15 new_lines.append(line)
16
17 return new_lines
18
19#default split for a name: JESSICA TURNER --> TURNER, JESSICA
20#names such as de Rooy, Ana must be entered manually
21def splitName(name):
22 name = name.split()
23 if len(name) >= 2:
24 return name[1] + ", " + name[0]
25 else:
26 return name[0]
27
28#See if the name is in the array or not - used to avoid duplicate occurences of names
29def checkForName(doc, n):
30
31 if not doc:
32 return False
33 else:
34
35 for name in doc:
36 if n==name:
37 return True
38 break
39 else:
40 return False
41
42#add the name to the document array in the correct format
43#soloists should appear soloist Turner, Jessica followed by Turner, Jessica on a new line as they are also considered a player
44#conductors should appear conductor Turner, Jessica and NOT Turner, Jessica on a separate line as they are not a player
45#players should appear on a newline as Turner, Jessica
46def appendName(doc, name, role):
47
48 if (role == "soloist") | (role == "soloists"):
49 doc.append("soloist " + name)
50 doc.append(name)
51 elif (role == "conductor") | (role == "conductors"):
52 doc.append("conductor " + name)
53 else:
54 doc.append(name)
55
56#This adds the player to the doc array, it is called after a role has been found
57def addPlayer(doc, iterator, role):
58
59 name = iterator.next()
60 notfin = True
61
62 while notfin == True:
63 if not name:
64 break
65 else:
66 print "NAME FOUND: " + name
67 name = splitName(name)
68 print "NAME TO BE ADDED: " + name
69 if checkForName(doc, name) == False:
70 #prompting user to select the name or not
71 #helps to prevent any mistakes if a role is found but the next line is not necessarily a name
72 #Y = Yes - using this option will add the name to the document array
73 #N = No - using this option will stop you from finding names
74 #ENTER NAME = Enter the correct format of the name - for use with names such as van der Goes, Natalie
75 #which will not be picked up correctly at the moment
76 #Hit enter to continue will skip over this line and retrieve next line
77 var = raw_input("Is this a name? Y|N|E|Hit any other key to continue...\n")
78 var = var.upper()
79 if var=="Y":
80 appendName(doc, name, role)
81 elif var=="E":
82 var = raw_input("Enter name: ")
83 appendName(doc, var, role)
84 elif var=="N":
85 notfin = False
86 else:
87 print "The name " + name + " is already in the document."
88
89 try:
90 name = iterator.next()
91 except StopIteration:
92 break
93
94#check to see if role can be found in document
95def findRole(roles, lines):
96
97 nameDoc_lines = []
98 #this is used so that if splitting needs to be done for program to detect role it can figure out whether to split it or not
99 specialcases = ['1st violin', '2nd violin', '1st violins', '2nd violins']
100
101 iterator = lines.__iter__()
102 for line in iterator:
103
104 if (not line):
105 try:
106 line = iterator.next()
107 except StopIteration:
108 return nameDoc_lines
109
110 print line
111
112 for case in specialcases:
113 if line.lower() == case:
114 line = line.split()
115 line = line[1]
116 break
117
118 for role in roles:
119 if role == line.lower():
120 print "LINE: " + line
121 print "MATCH FOUND " + role.upper()
122 addPlayer(nameDoc_lines, iterator, role)
123 break
124 return nameDoc_lines
125
126#write names out to file, takes orig_file as name so that new file can be created based on original file name
127#takes doc array of names to write to file
128def writeToFile(orig_file, nameDoc):
129 start_of_file = orig_file.split(".")
130 new_file_name = start_of_file[0] + "_name_list.txt"
131
132 if os.path.exists(new_file_name):
133 new_file = file(new_file_name, "r+")
134 else:
135 new_file = file(new_file_name, "w")
136
137 for name in nameDoc:
138 new_file.write(name + "\n")
139
140 print "Finished writing names to " + new_file_name
141
142
143#main method which calls methods in order of execution required
144def main():
145 roles = openFile("roles")
146 lines = openFile(sys.argv[1])
147
148 nameDoc = findRole(roles, lines)
149
150 writeToFile(sys.argv[1], nameDoc)
151
152main()
153
154
Note: See TracBrowser for help on using the repository browser.