Context Navigation

blockBlockAnalysis.py@ 38791

Last change on this file since 38791 was 38791, checked in by jc550, 4 months ago
add comments adding context to functions that require it
File size: 2.7 KB

Line
1	# Function to take a sample of articles from CrossRef and display information about the percentage of "Pure Latin" in the titles
2
3	from habanero import Crossref
4	import get_unicode_blocks
5
6	def get_sample(sampleSize):
7	# Set email address so that I can be put into "polite" pool
8	cr = Crossref(mailto = "[email protected]")
9	# If requested sample size is above 100, then we can't do that and must reduce it
10	if (sampleSize > 100): sampleSize = 100
11	# Generate and send query to CrossRef (Limited Query = 100)
12	query = cr.works(sample=sampleSize)
13	return query
14
15	def process_sample(sampleQuery):
16	sampleQueryItems = sampleQuery["message"]["items"]
17	sampleDataToReturn = []
18	#for each item in the sample
19	#print(len(sampleQueryItems))
20	for itemNum in range(len(sampleQueryItems)):
21	#print(str(itemNum) + " ", end = "")
22	item = sampleQueryItems[itemNum]
23	if "title" not in item.keys():
24	pass
25	#sampleDataToReturn.append(100)
26	else:
27	title = item["title"][0]
28	#print(title)
29	numLatinChars = 0
30	for char in enumerate(title):
31	#encodedChar = char[1].encode('unicode_escape')
32	encodedChar = int(hex(ord(char[1])), 16)
33	if encodedChar <= 255:
34	numLatinChars = numLatinChars + 1
35	lengthTitle = len(title)
36	if numLatinChars == 0: sampleDataToReturn.append(0)
37	else: sampleDataToReturn.append(numLatinChars/lengthTitle * 100)
38	#print(str(numLatinChars/lengthTitle * 100))
39
40
41	#print(sampleDataToReturn)
42	return sampleDataToReturn
43
44	def analyse_processed_sample(processedSamples):
45	averagePercentLatin = 0
46	numberPureLatin = 0
47	totalNumber = 0
48
49	for processedSample in processedSamples:
50	for articleData in processedSample:
51	totalNumber = totalNumber + 1
52	if articleData == 100: numberPureLatin = numberPureLatin + 1
53	averagePercentLatin = averagePercentLatin + articleData
54
55	averagePercentLatin = averagePercentLatin / totalNumber
56
57	print("Total Number of Articles Analysed: " + str(totalNumber))
58	print("Average Percent Latin: " + str(averagePercentLatin))
59	print("Number Pure Latin: " + str(numberPureLatin))
60
61	print("How many samples would you like to do? ", end="")
62	numSamples = int(input())
63	print("How big should each sample be? (MAX 100) ", end="")
64	sampleSize = int(input())
65
66	# Data on the percentage of each title that is comprised of Latin
67	sampleLatinData = []
68
69	# obtain data from each sample
70	for sample in range(numSamples):
71	queryResult = get_sample(sampleSize)
72	sampleLatinData.append(process_sample(queryResult))
73
74	analyse_processed_sample(sampleLatinData)
75

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: other-projects/metadata-encoding/py/using-api/blockBlockAnalysis.py@ 38791

Download in other formats: