Context Navigation

source: main/trunk/model-sites-dev/atea/collect/digital-nz/prepare/dnz-search-language-mi--python3.py@ 34459

Last change on this file since 34459 was 34459, checked in by davidb, 4 years ago
Now works with dnzkey.txt to separate out where the key is stored
Property svn:executable set to ``*
File size: 2.5 KB

Line
1	#!/usr/bin/env python3
2
3	from __future__ import absolute_import
4	from __future__ import division
5	from __future__ import print_function
6
7	from builtins import str
8
9	import dnz.api
10	import pprint
11	import json
12	import sys
13	import re
14
15	# DNZ key
16	with open('dnzkey.txt','r') as file:
17	dnzkey_str = file.read().strip()
18
19	# print("dnzkey_str = '" + dnzkey_str + "'")
20	DNZ = dnz.api.Dnz(dnzkey_str)
21
22
23	#result = dnz.search('kiwi tui')
24	#result = dnz.search('kiwi tui', per_page=50, page=10, fields=['id', 'title', 'collection', 'content_partner'])
25	#result = dnz.search(_or={'category':['Videos', 'Images']}, _and={'content_partner':['Ministry for Culture and Heritage']})
26
27	query_term={'language':['mi']}
28
29	results = DNZ.search(_and=query_term, per_page=50)
30	num_results=results.result_count
31	print("Number of matching records: " + str(results.result_count))
32
33
34	for i in range(0,num_results,50):
35	page = i / 50
36	results = DNZ.search(_and=query_term, page=page, per_page=50)
37
38	for rec in results.records:
39	# pprint.pprint(rec)
40	landing_url = rec["landing_url"]
41	print("Landing URL: " + rec["landing_url"])
42
43	### !!! landing_url = unicode(landing_url).encode('utf8')
44	##landing_url = str(landing_url).encode('utf8')
45	#json_landing_filename = re.sub('[:/.]', '-', landing_url) + ".json"
46
47	json_landing_filename1 = re.sub('[:]', '~COLON~', landing_url)
48	json_landing_filename2 = re.sub('[/]', '~SLASH~', json_landing_filename1)
49	json_landing_filename = re.sub('[.]', '~DOT~', json_landing_filename2) + ".json"
50
51	full_json_landing_filename = "import/" + json_landing_filename
52	print("json_landing_filename:\t" + json_landing_filename)
53
54	dc_identifiers = rec["dc_identifier"]
55
56	#json.dump(rec, sys.stdout)
57
58	if (len(dc_identifiers)>0):
59
60	### !!! full_id = unicode(dc_identifiers[0]).encode('utf8')
61	full_id = dc_identifiers[0]
62
63	print(" ID: " + full_id)
64
65	# tail_id = full_id.replace('http://hdl.handle.net/','')
66
67	# json_filename = re.sub('[:/.]', '-', tail_id) + ".json"
68	# print json_filename
69	else:
70	print("**** No ID specified")
71
72
73	with open(full_json_landing_filename, 'w') as outfile:
74	json.dump(rec, outfile)
75
76	print("")
77
78	# with open(json_filename, 'w') as outfile:
79	# json.dump(data, outfile)
80
81	#pprint.pprint(result.records)
82
83
84	#with open('data.json', 'w') as outfile:
85	# json.dump(data, outfile)
86
87

Note: See TracBrowser for help on using the repository browser.

Download in other formats: