Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

source: other-projects/metadata-encoding/py/using-api/blockBlockAnalysis.py@ 38771

Last change on this file since 38771 was 38771, checked in by jc550, 3 months ago
start seperating python
File size: 2.6 KB

Line
1	from habanero import Crossref
2	import get_unicode_blocks
3
4	def get_sample(sampleSize):
5	# Set email address so that I can be put into "polite" pool
6	cr = Crossref(mailto = "[email protected]")
7	# If requested sample size is above 100, then we can't do that and must reduce it
8	if (sampleSize > 100): sampleSize = 100
9	# Generate and send query to CrossRef (Limited Query = 100)
10	query = cr.works(sample=sampleSize)
11	return query
12
13	def process_sample(sampleQuery):
14	sampleQueryItems = sampleQuery["message"]["items"]
15	sampleDataToReturn = []
16	#for each item in the sample
17	#print(len(sampleQueryItems))
18	for itemNum in range(len(sampleQueryItems)):
19	#print(str(itemNum) + " ", end = "")
20	item = sampleQueryItems[itemNum]
21	if "title" not in item.keys():
22	pass
23	#sampleDataToReturn.append(100)
24	else:
25	title = item["title"][0]
26	#print(title)
27	numLatinChars = 0
28	for char in enumerate(title):
29	#encodedChar = char[1].encode('unicode_escape')
30	encodedChar = int(hex(ord(char[1])), 16)
31	if encodedChar <= 255:
32	numLatinChars = numLatinChars + 1
33	lengthTitle = len(title)
34	if numLatinChars == 0: sampleDataToReturn.append(0)
35	else: sampleDataToReturn.append(numLatinChars/lengthTitle * 100)
36	#print(str(numLatinChars/lengthTitle * 100))
37
38
39	#print(sampleDataToReturn)
40	return sampleDataToReturn
41
42	def analyse_processed_sample(processedSamples):
43	averagePercentLatin = 0
44	numberPureLatin = 0
45	totalNumber = 0
46
47	for processedSample in processedSamples:
48	for articleData in processedSample:
49	totalNumber = totalNumber + 1
50	if articleData == 100: numberPureLatin = numberPureLatin + 1
51	averagePercentLatin = averagePercentLatin + articleData
52
53	averagePercentLatin = averagePercentLatin / totalNumber
54
55	print("Total Number of Articles Analysed: " + str(totalNumber))
56	print("Average Percent Latin: " + str(averagePercentLatin))
57	print("Number Pure Latin: " + str(numberPureLatin))
58
59	print("How many samples would you like to do? ", end="")
60	numSamples = int(input())
61	print("How big should each sample be? (MAX 100) ", end="")
62	sampleSize = int(input())
63
64	# Data on the percentage of each title that is comprised of Latin
65	sampleLatinData = []
66
67	# obtain data from each sample
68	for sample in range(numSamples):
69	queryResult = get_sample(sampleSize)
70	sampleLatinData.append(process_sample(queryResult))
71
72	analyse_processed_sample(sampleLatinData)
73

Note: See TracBrowser for help on using the repository browser.

Download in other formats: