Context Navigation

get_unicode_blocks.py@ 38791

Last change on this file since 38791 was 38791, checked in by jc550, 4 months ago
add comments adding context to functions that require it
File size: 1.4 KB

Line
1	#!/usr/bin/env PYTHONIOENCODING=utf-8 python
2	# encoding: utf-8
3
4	# This is code from Chris Adams, source https://gist.github.com/acdha/49a610089c2798db6fe2
5
6	from __future__ import absolute_import, print_function, unicode_literals
7
8	import os
9	import re
10
11	import requests
12
13
14	def get_block_for_codepoint(cp):
15	"""Return the Unicode block name for the provided numeric codepoint"""
16
17	for start, end, block_name in UNICODE_BLOCKS:
18	if start <= cp <= end:
19	return block_name
20
21	return 'No_Block'
22
23
24	def load_unicode_blocks_from_file(f):
25	file_contents = f.read().decode('utf-8')
26
27	blocks = []
28	for start, end, block_name in re.findall(r'([0-9A-F]+)\.\.([0-9A-F]+);\ (\S.*\S)', file_contents):
29	if block_name == 'No_Block':
30	continue
31
32	blocks.append((int(start, 16), int(end, 16), block_name))
33
34	return blocks
35
36
37	def load_unicode_blocks(block_filename):
38	if not os.path.exists(block_filename):
39	print('Unicode block file %s does not exist. DownloadingâŠ' % block_filename)
40	r = requests.get('http://unicode.org/Public/UNIDATA/Blocks.txt')
41	r.raise_for_status()
42
43	with open(block_filename, 'wb') as f:
44	for chunk in r.iter_content():
45	f.write(chunk)
46
47	with open(block_filename, 'rb') as f:
48	blocks = load_unicode_blocks_from_file(f)
49
50	return blocks
51
52	UNICODE_BLOCKS = load_unicode_blocks('UNIDATA-Blocks.txt')

Note: See TracBrowser for help on using the repository browser.