1 | # compare titles of openalex and xref, checking to see if the subtitle matters
|
---|
2 | # would probably work a lot better if openalex was in a database. major bottleneck on api wait times
|
---|
3 |
|
---|
4 | import sys, pymongo, json
|
---|
5 | from alive_progress import alive_bar
|
---|
6 | sys.path.insert(1, 'comparisonTest/')
|
---|
7 | from compare import getDOIOpenAlex, getTitleFromOpenAlexObject
|
---|
8 |
|
---|
9 | #get next document from mongo
|
---|
10 | #get title, subtitle, and DOI
|
---|
11 | #get title from openalex
|
---|
12 | #compare xref title+subtitle with oAlex title
|
---|
13 | #count depending on:
|
---|
14 | # - titles are directly equivalent, without subtitles
|
---|
15 | # - titles are not equivalent, but subtitle makes them equivalent
|
---|
16 | # - titles are not equivalent, even with subtitles
|
---|
17 | #print to log file
|
---|
18 |
|
---|
19 | writeFile = open("titleLog.txt", "w", encoding="utf-8")
|
---|
20 |
|
---|
21 | def printMessage(doi, xRefTitle, oAlexTitle, equivalenceMessage):
|
---|
22 | if(equivalenceMessage == "Titles Equivalent"):
|
---|
23 | print(doi + " : Equivalent")
|
---|
24 | return
|
---|
25 | print("========================")
|
---|
26 | print("DOI: " + doi)
|
---|
27 | print("XREF TITLE: " + xRefTitle)
|
---|
28 | print("OA TITLE: " + oAlexTitle)
|
---|
29 | print("EQUIVALENT: " + equivalenceMessage)
|
---|
30 | print("========================")
|
---|
31 |
|
---|
32 | dictToConvert = {"DOI" : doi, "xrefTitle" : xRefTitle ,"openAlexTitle" : oAlexTitle, "msg" : equivalenceMessage}
|
---|
33 | jsonToWrite = json.dumps(dictToConvert, indent=3)
|
---|
34 | writeFile.write(jsonToWrite + "\n")
|
---|
35 | #since it is most likely that someone using this program will not be closing the file handler, we should flush it
|
---|
36 | #so that data is actually displayed
|
---|
37 | writeFile.flush()
|
---|
38 |
|
---|
39 | def main():
|
---|
40 | #initialise mongo connection
|
---|
41 | mongoClient = pymongo.MongoClient()
|
---|
42 | mongoDatabase = mongoClient["test"]
|
---|
43 | mongoCollection = mongoDatabase["test"]
|
---|
44 |
|
---|
45 | #get cursor for searching database
|
---|
46 | documents = mongoCollection.find()
|
---|
47 | count = mongoCollection.count_documents({})
|
---|
48 |
|
---|
49 | #for each document in the database
|
---|
50 | with alive_bar(count) as bar:
|
---|
51 | for document in documents:
|
---|
52 | try:
|
---|
53 | #get the xref information
|
---|
54 | xRefTitle = document["title"]
|
---|
55 | xRefSubtitle = None
|
---|
56 | if "subtitle" in document.keys():
|
---|
57 | xRefSubtitle = document["subtitle"]
|
---|
58 | xRefDOI = document["DOI"]
|
---|
59 |
|
---|
60 | #now lets get the openalex info
|
---|
61 | oAlexObject = getDOIOpenAlex(xRefDOI)
|
---|
62 | oAlexTitle = getTitleFromOpenAlexObject(oAlexObject)
|
---|
63 |
|
---|
64 | #and we must determine equivalence and such...
|
---|
65 | #case 1 titles are same
|
---|
66 | if xRefTitle == oAlexTitle:
|
---|
67 | printMessage(xRefDOI, xRefTitle, oAlexTitle, "Titles Equivalent")
|
---|
68 | continue
|
---|
69 |
|
---|
70 | #case 2 titles are not same and subtitle does not exist for this DOI
|
---|
71 | if xRefSubtitle is None or xRefSubtitle == "":
|
---|
72 | printMessage(xRefDOI, xRefTitle, oAlexTitle, "Titles Not Equivalent")
|
---|
73 | continue
|
---|
74 |
|
---|
75 | #now we know the titles are not the same... does the openalex title contain the subtitle?
|
---|
76 | #case 3 titles are completely different
|
---|
77 | if xRefSubtitle not in oAlexTitle:
|
---|
78 | printMessage(xRefDOI, xRefTitle, oAlexTitle, "Titles Not Equivalent")
|
---|
79 | continue
|
---|
80 |
|
---|
81 | #so the titles are not equivalent... but openalex title contains the subtitle! they are probably a little equivalent...
|
---|
82 | #get all words that are not in the title or subtitle
|
---|
83 | splitOAlexTitle = oAlexTitle.split([xRefTitle, xRefSubtitle])
|
---|
84 | #only one string between them
|
---|
85 | if len(splitOAlexTitle) == 1:
|
---|
86 | printMessage(xRefDOI, xRefTitle + " " + xRefSubtitle, oAlexTitle, "Title + Subtitle Somewhat Equivalent to Title")
|
---|
87 | continue
|
---|
88 | else:
|
---|
89 | printMessage(xRefDOI, xRefTitle + " " + xRefSubtitle, oAlexTitle, "Title + Subtitle Similar to Title but Not Equivalent")
|
---|
90 | continue
|
---|
91 | except Exception as error:
|
---|
92 | #probably from openalex not having DOI
|
---|
93 | print("Error: " + str(error))
|
---|
94 | finally:
|
---|
95 | bar()
|
---|
96 |
|
---|
97 | #close the file handler
|
---|
98 | writeFile.close()
|
---|
99 |
|
---|
100 | main() |
---|