1 | #program to convert xref metadata files into compatible files for mongo database
|
---|
2 |
|
---|
3 | import pymongo, gzip, argparse, json
|
---|
4 |
|
---|
5 | numFile = 0
|
---|
6 |
|
---|
7 | #convert std xref metadata file
|
---|
8 | def convertFile(path):
|
---|
9 | jsonItems = ""
|
---|
10 | with gzip.open(path, "r", encoding="utf-8") as gzippedFile:
|
---|
11 | jsonDictionary = json.load(gzippedFile)
|
---|
12 | for item in jsonDictionary["items"]:
|
---|
13 | dictToDump = {"_id":numFile,}
|
---|
14 | if "DOI" in item.keys():
|
---|
15 | dictToDump.update({"DOI":item["DOI"]})
|
---|
16 | if "title" in item.keys():
|
---|
17 | dictToDump.update({"title":item["title"][0]})
|
---|
18 | if "subtitle" in item.keys():
|
---|
19 | dictToDump.update({"subtitle":item["subtitle"][0]})
|
---|
20 |
|
---|
21 |
|
---|
22 |
|
---|
23 | def convertFileVerbose(path):
|
---|
24 | print("Converting {0}...", path)
|
---|
25 |
|
---|
26 | def processDir(verbose, path, count):
|
---|
27 | #set function depending on verbosity selected
|
---|
28 | conversionFunction = convertFile
|
---|
29 | if verbose: conversionFunction = convertFileVerbose
|
---|
30 |
|
---|
31 | # main entry point and argument parser
|
---|
32 | def main():
|
---|
33 | parser = argparse.ArgumentParser(
|
---|
34 | prog="xRefToMongo",
|
---|
35 | description="convert xRef Metadata to compatible file for Mongo"
|
---|
36 | )
|
---|
37 | parser.add_argument("filePath", help="path to file or directory")
|
---|
38 | parser.add_argument("-c", "--count", dest="count", help="number of file to convert if working with full metadata archive")
|
---|
39 | parser.add_argument("-v", "--verbose", dest="verbose", action="store_true", help="enable verbose output")
|
---|
40 | parser.add_argument("-db", "--database", dest="database", help="export directly to mongodb database (requires collection)")
|
---|
41 | parser.add_argument("-c", "--collection", dest="collection", help="collection in specified database")
|
---|
42 |
|
---|
43 | # when no input for count, equals None
|
---|
44 | parsedArgs = parser.parse_args()
|
---|
45 |
|
---|
46 | # if filepath is a directory
|
---|
47 | if "." not in parsedArgs.filePath:
|
---|
48 | processDir(parsedArgs.verbose, parsedArgs.filePath, parsedArgs.count)
|
---|
49 |
|
---|
50 |
|
---|
51 | #print(parsedArgs.count)
|
---|
52 |
|
---|
53 | if __name__ == "__main__":
|
---|
54 | main()
|
---|