source: other-projects/metadata-encoding/py/xRefToMongo.py@ 38488

Last change on this file since 38488 was 38488, checked in by jc550, 6 months ago

add part of convertFile function

File size: 2.0 KB
Line 
1#program to convert xref metadata files into compatible files for mongo database
2
3import pymongo, gzip, argparse, json
4
5numFile = 0
6
7#convert std xref metadata file
8def convertFile(path):
9 jsonItems = ""
10 with gzip.open(path, "r", encoding="utf-8") as gzippedFile:
11 jsonDictionary = json.load(gzippedFile)
12 for item in jsonDictionary["items"]:
13 dictToDump = {"_id":numFile,}
14 if "DOI" in item.keys():
15 dictToDump.update({"DOI":item["DOI"]})
16 if "title" in item.keys():
17 dictToDump.update({"title":item["title"][0]})
18 if "subtitle" in item.keys():
19 dictToDump.update({"subtitle":item["subtitle"][0]})
20
21
22
23def convertFileVerbose(path):
24 print("Converting {0}...", path)
25
26def processDir(verbose, path, count):
27 #set function depending on verbosity selected
28 conversionFunction = convertFile
29 if verbose: conversionFunction = convertFileVerbose
30
31# main entry point and argument parser
32def main():
33 parser = argparse.ArgumentParser(
34 prog="xRefToMongo",
35 description="convert xRef Metadata to compatible file for Mongo"
36 )
37 parser.add_argument("filePath", help="path to file or directory")
38 parser.add_argument("-c", "--count", dest="count", help="number of file to convert if working with full metadata archive")
39 parser.add_argument("-v", "--verbose", dest="verbose", action="store_true", help="enable verbose output")
40 parser.add_argument("-db", "--database", dest="database", help="export directly to mongodb database (requires collection)")
41 parser.add_argument("-c", "--collection", dest="collection", help="collection in specified database")
42
43 # when no input for count, equals None
44 parsedArgs = parser.parse_args()
45
46 # if filepath is a directory
47 if "." not in parsedArgs.filePath:
48 processDir(parsedArgs.verbose, parsedArgs.filePath, parsedArgs.count)
49
50
51 #print(parsedArgs.count)
52
53if __name__ == "__main__":
54 main()
Note: See TracBrowser for help on using the repository browser.