source: other-projects/metadata-encoding/py/using-api/testSubtitles.py@ 38791

Last change on this file since 38791 was 38791, checked in by jc550, 4 months ago

add comments adding context to functions that require it

File size: 1.5 KB
Line 
1# test to retrieve subtitles
2
3from habanero import Crossref
4from diophila import OpenAlex
5import sys, argparse, json
6
7cr = Crossref(mailto="[email protected]")
8oa = OpenAlex("[email protected]")
9dateRegex = "[0-9]{4}-{1}[0-9]+-{1}[0-9]+"
10
11doiToCheck = []
12
13def retrieveDOIsWithSubtitles(num):
14 currentDate = "2015-01-01"
15 numCounted = 0
16 while numCounted < num:
17 datasegment = []
18 query = cr.works(filter = {'type' : ['journal-article', 'proceedings-article', 'book-chapter']}, progress_bar=True, sample=100)
19
20 items = query["message"]["items"]
21 for item in items:
22 #print(item["title"][0])
23 if "subtitle" in item.keys():
24 print("Title: " + item["title"][0] + " | Subtitle: " + item["subtitle"][0] + " | DOI: https://doi.org/" + item["DOI"])
25 if item["DOI"] not in doiToCheck:
26 numCounted = numCounted + 1
27 datasegment.append(item["DOI"])
28
29 doiToCheck.append({"dois":datasegment})
30
31def main(argv):
32 parser = argparse.ArgumentParser()
33 parser.add_argument("numberDOIToGet", type=int, help="the number of DOIs that is wanted")
34 parser.add_argument("outputFilePath", help="the output file path")
35 args = parser.parse_args()
36
37 outputFile = open(args.outputFilePath, "w", encoding="utf-8")
38 retrieveDOIsWithSubtitles(args.numberDOIToGet)
39 json.dump({"dataArray": doiToCheck}, outputFile)
40
41if __name__ == "__main__":
42 main(sys.argv)
Note: See TracBrowser for help on using the repository browser.