1 | #!/usr/bin/env python3
|
---|
2 |
|
---|
3 | from __future__ import absolute_import
|
---|
4 | from __future__ import division
|
---|
5 | from __future__ import print_function
|
---|
6 |
|
---|
7 | from builtins import str
|
---|
8 |
|
---|
9 | import os
|
---|
10 | import sys
|
---|
11 | import re
|
---|
12 | import json
|
---|
13 | #import pprint
|
---|
14 |
|
---|
15 | import dnz.api
|
---|
16 |
|
---|
17 |
|
---|
18 | output_dir = "downloads"
|
---|
19 |
|
---|
20 | # DNZ key
|
---|
21 | with open('dnzkey.txt','r') as file:
|
---|
22 | dnzkey_str = file.read().strip()
|
---|
23 |
|
---|
24 | # print("dnzkey_str = '" + dnzkey_str + "'")
|
---|
25 | DNZ = dnz.api.Dnz(dnzkey_str)
|
---|
26 |
|
---|
27 |
|
---|
28 | #result = dnz.search('kiwi tui')
|
---|
29 | #result = dnz.search('kiwi tui', per_page=50, page=10, fields=['id', 'title', 'collection', 'content_partner'])
|
---|
30 | #result = dnz.search(_or={'category':['Videos', 'Images']}, _and={'content_partner':['Ministry for Culture and Heritage']})
|
---|
31 |
|
---|
32 | query_term={'language':['mi']}
|
---|
33 |
|
---|
34 | results = DNZ.search(_and=query_term, per_page=50)
|
---|
35 | num_results=results.result_count
|
---|
36 | print("Number of matching records: " + str(results.result_count))
|
---|
37 |
|
---|
38 |
|
---|
39 | for i in range(0,num_results,50):
|
---|
40 | page = i / 50
|
---|
41 | results = DNZ.search(_and=query_term, page=page, per_page=50)
|
---|
42 |
|
---|
43 | for rec in results.records:
|
---|
44 | # pprint.pprint(rec)
|
---|
45 | landing_url = rec["landing_url"]
|
---|
46 | print("Landing URL: " + rec["landing_url"])
|
---|
47 |
|
---|
48 | ### !!! landing_url = unicode(landing_url).encode('utf8')
|
---|
49 | ##landing_url = str(landing_url).encode('utf8')
|
---|
50 | #json_landing_filename = re.sub('[:/.]', '-', landing_url) + ".json"
|
---|
51 |
|
---|
52 | json_landing_filename1 = re.sub('[:]', '~COLON~', landing_url)
|
---|
53 | json_landing_filename2 = re.sub('[/]', '~SLASH~', json_landing_filename1)
|
---|
54 | json_landing_filename = re.sub('[.]', '~DOT~', json_landing_filename2) + ".json"
|
---|
55 |
|
---|
56 | full_json_landing_filename = os.path.join(output_dir,json_landing_filename)
|
---|
57 | print("json_landing_filename:\t" + json_landing_filename)
|
---|
58 |
|
---|
59 | dc_identifiers = rec["dc_identifier"]
|
---|
60 |
|
---|
61 | #json.dump(rec, sys.stdout)
|
---|
62 |
|
---|
63 | if (len(dc_identifiers)>0):
|
---|
64 |
|
---|
65 | ### !!! full_id = unicode(dc_identifiers[0]).encode('utf8')
|
---|
66 | full_id = dc_identifiers[0]
|
---|
67 |
|
---|
68 | print(" ID: " + full_id)
|
---|
69 |
|
---|
70 | # tail_id = full_id.replace('http://hdl.handle.net/','')
|
---|
71 |
|
---|
72 | # json_filename = re.sub('[:/.]', '-', tail_id) + ".json"
|
---|
73 | # print json_filename
|
---|
74 | else:
|
---|
75 | print("**** No ID specified")
|
---|
76 |
|
---|
77 |
|
---|
78 | with open(full_json_landing_filename, 'w') as outfile:
|
---|
79 | json.dump(rec, outfile)
|
---|
80 |
|
---|
81 | print("")
|
---|
82 |
|
---|
83 | #pprint.pprint(result.records)
|
---|
84 |
|
---|
85 |
|
---|
86 |
|
---|
87 |
|
---|