#!/usr/bin/env python3 from __future__ import absolute_import from __future__ import division from __future__ import print_function from builtins import str import dnz.api import pprint import json import sys import re # DNZ key DNZ = dnz.api.Dnz('') #result = dnz.search('kiwi tui') #result = dnz.search('kiwi tui', per_page=50, page=10, fields=['id', 'title', 'collection', 'content_partner']) #result = dnz.search(_or={'category':['Videos', 'Images']}, _and={'content_partner':['Ministry for Culture and Heritage']}) query_term={'language':['mi']} results = DNZ.search(_and=query_term, per_page=50) num_results=results.result_count print("Number of matching records: " + str(results.result_count)) for i in range(0,num_results,50): page = i / 50 results = DNZ.search(_and=query_term, page=page, per_page=50) for rec in results.records: # pprint.pprint(rec) landing_url = rec["landing_url"] print("Landing URL: " + rec["landing_url"]) ### !!! landing_url = unicode(landing_url).encode('utf8') ##landing_url = str(landing_url).encode('utf8') #json_landing_filename = re.sub('[:/.]', '-', landing_url) + ".json" json_landing_filename1 = re.sub('[:]', '~COLON~', landing_url) json_landing_filename2 = re.sub('[/]', '~SLASH~', json_landing_filename1) json_landing_filename = re.sub('[.]', '~DOT~', json_landing_filename2) + ".json" full_json_landing_filename = "import/" + json_landing_filename print("json_landing_filename:\t" + json_landing_filename) dc_identifiers = rec["dc_identifier"] #json.dump(rec, sys.stdout) if (len(dc_identifiers)>0): ### !!! full_id = unicode(dc_identifiers[0]).encode('utf8') full_id = dc_identifiers[0] print(" ID: " + full_id) # tail_id = full_id.replace('http://hdl.handle.net/','') # json_filename = re.sub('[:/.]', '-', tail_id) + ".json" # print json_filename else: print("**** No ID specified") with open(full_json_landing_filename, 'w') as outfile: json.dump(rec, outfile) print("") # with open(json_filename, 'w') as outfile: # json.dump(data, outfile) #pprint.pprint(result.records) #with open('data.json', 'w') as outfile: # json.dump(data, outfile)