[34447] | 1 | #!/usr/bin/env python3
|
---|
| 2 |
|
---|
| 3 | from __future__ import absolute_import
|
---|
| 4 | from __future__ import division
|
---|
| 5 | from __future__ import print_function
|
---|
| 6 |
|
---|
| 7 | from builtins import str
|
---|
| 8 |
|
---|
| 9 | import dnz.api
|
---|
| 10 | import pprint
|
---|
| 11 | import json
|
---|
| 12 | import sys
|
---|
| 13 | import re
|
---|
| 14 |
|
---|
| 15 | # DNZ key
|
---|
| 16 | DNZ = dnz.api.Dnz('<CHANGEME>')
|
---|
| 17 |
|
---|
| 18 | #result = dnz.search('kiwi tui')
|
---|
| 19 | #result = dnz.search('kiwi tui', per_page=50, page=10, fields=['id', 'title', 'collection', 'content_partner'])
|
---|
| 20 | #result = dnz.search(_or={'category':['Videos', 'Images']}, _and={'content_partner':['Ministry for Culture and Heritage']})
|
---|
| 21 |
|
---|
| 22 | query_term={'language':['mi']}
|
---|
| 23 |
|
---|
| 24 | results = DNZ.search(_and=query_term, per_page=50)
|
---|
| 25 | num_results=results.result_count
|
---|
| 26 | print("Number of matching records: " + str(results.result_count))
|
---|
| 27 |
|
---|
| 28 |
|
---|
| 29 | for i in range(0,num_results,50):
|
---|
| 30 | page = i / 50
|
---|
| 31 | results = DNZ.search(_and=query_term, page=page, per_page=50)
|
---|
| 32 |
|
---|
| 33 | for rec in results.records:
|
---|
| 34 | # pprint.pprint(rec)
|
---|
| 35 | landing_url = rec["landing_url"]
|
---|
| 36 | print("Landing URL: " + rec["landing_url"])
|
---|
| 37 |
|
---|
| 38 | ### !!! landing_url = unicode(landing_url).encode('utf8')
|
---|
| 39 | ##landing_url = str(landing_url).encode('utf8')
|
---|
| 40 | #json_landing_filename = re.sub('[:/.]', '-', landing_url) + ".json"
|
---|
| 41 |
|
---|
[34453] | 42 | json_landing_filename1 = re.sub('[:]', '~COLON~', landing_url)
|
---|
| 43 | json_landing_filename2 = re.sub('[/]', '~SLASH~', json_landing_filename1)
|
---|
| 44 | json_landing_filename = re.sub('[.]', '~DOT~', json_landing_filename2) + ".json"
|
---|
[34447] | 45 |
|
---|
| 46 | full_json_landing_filename = "import/" + json_landing_filename
|
---|
| 47 | print("json_landing_filename:\t" + json_landing_filename)
|
---|
| 48 |
|
---|
| 49 | dc_identifiers = rec["dc_identifier"]
|
---|
| 50 |
|
---|
| 51 | #json.dump(rec, sys.stdout)
|
---|
| 52 |
|
---|
| 53 | if (len(dc_identifiers)>0):
|
---|
| 54 |
|
---|
| 55 | ### !!! full_id = unicode(dc_identifiers[0]).encode('utf8')
|
---|
| 56 | full_id = dc_identifiers[0]
|
---|
| 57 |
|
---|
| 58 | print(" ID: " + full_id)
|
---|
| 59 |
|
---|
| 60 | # tail_id = full_id.replace('http://hdl.handle.net/','')
|
---|
| 61 |
|
---|
| 62 | # json_filename = re.sub('[:/.]', '-', tail_id) + ".json"
|
---|
| 63 | # print json_filename
|
---|
| 64 | else:
|
---|
| 65 | print("**** No ID specified")
|
---|
| 66 |
|
---|
| 67 |
|
---|
| 68 | with open(full_json_landing_filename, 'w') as outfile:
|
---|
| 69 | json.dump(rec, outfile)
|
---|
| 70 |
|
---|
| 71 | print("")
|
---|
| 72 |
|
---|
| 73 | # with open(json_filename, 'w') as outfile:
|
---|
| 74 | # json.dump(data, outfile)
|
---|
| 75 |
|
---|
| 76 | #pprint.pprint(result.records)
|
---|
| 77 |
|
---|
| 78 |
|
---|
| 79 | #with open('data.json', 'w') as outfile:
|
---|
| 80 | # json.dump(data, outfile)
|
---|
| 81 |
|
---|
| 82 |
|
---|