source: main/trunk/model-sites-dev/atea/collect/digital-nz/prepare/dnz-search-language-mi--python3.py@ 34447

Last change on this file since 34447 was 34447, checked in by davidb, 4 years ago

Key files to 'prepare' an import folder by runing DNZ query for language=mi

  • Property svn:executable set to *
File size: 2.4 KB
Line 
1#!/usr/bin/env python3
2
3from __future__ import absolute_import
4from __future__ import division
5from __future__ import print_function
6
7from builtins import str
8
9import dnz.api
10import pprint
11import json
12import sys
13import re
14
15# DNZ key
16DNZ = dnz.api.Dnz('<CHANGEME>')
17
18
19#result = dnz.search('kiwi tui')
20#result = dnz.search('kiwi tui', per_page=50, page=10, fields=['id', 'title', 'collection', 'content_partner'])
21#result = dnz.search(_or={'category':['Videos', 'Images']}, _and={'content_partner':['Ministry for Culture and Heritage']})
22
23query_term={'language':['mi']}
24
25results = DNZ.search(_and=query_term, per_page=50)
26num_results=results.result_count
27print("Number of matching records: " + str(results.result_count))
28
29
30for i in range(0,num_results,50):
31 page = i / 50
32 results = DNZ.search(_and=query_term, page=page, per_page=50)
33
34 for rec in results.records:
35 # pprint.pprint(rec)
36 landing_url = rec["landing_url"]
37 print("Landing URL: " + rec["landing_url"])
38
39 ### !!! landing_url = unicode(landing_url).encode('utf8')
40 ##landing_url = str(landing_url).encode('utf8')
41 #json_landing_filename = re.sub('[:/.]', '-', landing_url) + ".json"
42
43 json_landing_filename1 = re.sub('[:]', '-', landing_url)
44 json_landing_filename2 = re.sub('[/]', '|', json_landing_filename1)
45 json_landing_filename = re.sub('[.]', '_', json_landing_filename2) + ".json"
46
47 full_json_landing_filename = "import/" + json_landing_filename
48 print("json_landing_filename:\t" + json_landing_filename)
49
50 dc_identifiers = rec["dc_identifier"]
51
52 #json.dump(rec, sys.stdout)
53
54 if (len(dc_identifiers)>0):
55
56 ### !!! full_id = unicode(dc_identifiers[0]).encode('utf8')
57 full_id = dc_identifiers[0]
58
59 print(" ID: " + full_id)
60
61 # tail_id = full_id.replace('http://hdl.handle.net/','')
62
63 # json_filename = re.sub('[:/.]', '-', tail_id) + ".json"
64 # print json_filename
65 else:
66 print("**** No ID specified")
67
68
69 with open(full_json_landing_filename, 'w') as outfile:
70 json.dump(rec, outfile)
71
72 print("")
73
74# with open(json_filename, 'w') as outfile:
75# json.dump(data, outfile)
76
77#pprint.pprint(result.records)
78
79
80#with open('data.json', 'w') as outfile:
81# json.dump(data, outfile)
82
83
Note: See TracBrowser for help on using the repository browser.