source: main/trunk/model-sites-dev/atea/collect/digital-nz/prepare/dnz-search-language-mi--python3.py@ 34460

Last change on this file since 34460 was 34460, checked in by davidb, 4 years ago

Tidy up of code and notes

  • Property svn:executable set to *
File size: 2.4 KB
Line 
1#!/usr/bin/env python3
2
3from __future__ import absolute_import
4from __future__ import division
5from __future__ import print_function
6
7from builtins import str
8
9import os
10import sys
11import re
12import json
13#import pprint
14
15import dnz.api
16
17
18output_dir = "downloads"
19
20# DNZ key
21with open('dnzkey.txt','r') as file:
22 dnzkey_str = file.read().strip()
23
24# print("dnzkey_str = '" + dnzkey_str + "'")
25DNZ = dnz.api.Dnz(dnzkey_str)
26
27
28#result = dnz.search('kiwi tui')
29#result = dnz.search('kiwi tui', per_page=50, page=10, fields=['id', 'title', 'collection', 'content_partner'])
30#result = dnz.search(_or={'category':['Videos', 'Images']}, _and={'content_partner':['Ministry for Culture and Heritage']})
31
32query_term={'language':['mi']}
33
34results = DNZ.search(_and=query_term, per_page=50)
35num_results=results.result_count
36print("Number of matching records: " + str(results.result_count))
37
38
39for i in range(0,num_results,50):
40 page = i / 50
41 results = DNZ.search(_and=query_term, page=page, per_page=50)
42
43 for rec in results.records:
44 # pprint.pprint(rec)
45 landing_url = rec["landing_url"]
46 print("Landing URL: " + rec["landing_url"])
47
48 ### !!! landing_url = unicode(landing_url).encode('utf8')
49 ##landing_url = str(landing_url).encode('utf8')
50 #json_landing_filename = re.sub('[:/.]', '-', landing_url) + ".json"
51
52 json_landing_filename1 = re.sub('[:]', '~COLON~', landing_url)
53 json_landing_filename2 = re.sub('[/]', '~SLASH~', json_landing_filename1)
54 json_landing_filename = re.sub('[.]', '~DOT~', json_landing_filename2) + ".json"
55
56 full_json_landing_filename = os.path.join(output_dir,json_landing_filename)
57 print("json_landing_filename:\t" + json_landing_filename)
58
59 dc_identifiers = rec["dc_identifier"]
60
61 #json.dump(rec, sys.stdout)
62
63 if (len(dc_identifiers)>0):
64
65 ### !!! full_id = unicode(dc_identifiers[0]).encode('utf8')
66 full_id = dc_identifiers[0]
67
68 print(" ID: " + full_id)
69
70 # tail_id = full_id.replace('http://hdl.handle.net/','')
71
72 # json_filename = re.sub('[:/.]', '-', tail_id) + ".json"
73 # print json_filename
74 else:
75 print("**** No ID specified")
76
77
78 with open(full_json_landing_filename, 'w') as outfile:
79 json.dump(rec, outfile)
80
81 print("")
82
83#pprint.pprint(result.records)
84
85
86
87
Note: See TracBrowser for help on using the repository browser.