Context Navigation

source: main/trunk/model-sites-dev/heritage-nz/collect/pdf-reports/prepare/xlsutil.py@ 33248

Last change on this file since 33248 was 33222, checked in by davidb, 5 years ago
More careful working with unicode/utf8
File size: 3.0 KB

Line
1	import xlrd
2
3	# Currently access worksheet by index num
4	# Alternative is to do this by worksheet name
5	worksheet_index_pos = 0
6	## worksheet_name = "Archaeological reports"
7
8	sanitized_headers = [ u"Report Identifier", u"Author", u"Title", u"Produced By", u"Date", u"HNZPT Region", u"File No",
9	u"Authority No", u"Site No", u"Relevant TLA's", u"Record Type", u"Date Entered" ]
10
11	def make_unicode(value):
12	value_unicode = value
13
14	if type(value) != unicode:
15	value_str = value
16	if type(value_str) != str:
17	value_str = str(value)
18
19	value_unicode = value_str.decode('utf-8')
20
21	return value_unicode
22
23
24	def cell_value_tidy_unicode(worksheet,row_i,col_j):
25	value= worksheet.cell_value(row_i,col_j)
26
27	value_unicode = make_unicode(value)
28	value_unicode_stripped = value_unicode.strip()
29
30	return value_unicode_stripped
31
32
33	def format_if_int(cell_value):
34	# Numbers from worksheet are represented as floating-point type
35	# This causes a problem when it is an 'int' as it then gets
36	# written out as a floating point number (with a '.0')
37	# => test for float and when there is no value after
38	# the decimal point, explicitly cast to 'int'
39	if type(cell_value) is float:
40	if cell_value == int(cell_value):
41	cell_value = int(cell_value)
42
43	return cell_value
44
45
46
47	header_row_pos = 0
48
49	def getHeaderMappingToColNum(worksheet):
50
51	# For header line, build up hashmap of header-names to column number
52	# Assume header line is first row of spreadsheet
53
54	header_names_mapping = {}
55
56	num_header_cols = worksheet.row_len(header_row_pos)
57
58	found_header = False
59	for col_j in range(num_header_cols):
60	header_cell_value = cell_value_tidy_unicode(worksheet,header_row_pos,col_j)
61	if header_cell_value == u"Report Identifier":
62	found_header = True
63
64	if header_cell_value and header_cell_value.strip():
65	header_names_mapping[header_cell_value] = col_j
66
67	if not found_header:
68	header_names_mapping = None
69
70	return header_names_mapping
71
72
73	# The following can be used on an XSL file, but not currently an XSLX file
74	# This is because, at the time of coding, xlrd does not support loading
75	# in an XSLT workbook with formatting_info=True
76
77	# https://code.activestate.com/recipes/546518-simple-conversion-of-excel-files-into-csv-and-yaml/download/1/
78	def format_excelval(book, type, value, wanttupledate):
79	""" Clean up the incoming excel data """
80	## Data Type Codes:
81	## EMPTY 0
82	## TEXT 1 a Unicode string
83	## NUMBER 2 float
84	## DATE 3 float
85	## BOOLEAN 4 int; 1 means TRUE, 0 means FALSE
86	## ERROR 5
87	returnrow = []
88	if type == 2: # TEXT
89	if value == int(value): value = int(value)
90	elif type == 3: # NUMBER
91	datetuple = xlrd.xldate_as_tuple(value, book.datemode)
92	value = datetuple if wanttupledate else tupledate_to_isodate(datetuple)
93	elif type == 5: # ERROR
94	value = xlrd.error_text_from_code[value]
95	return value
96

Note: See TracBrowser for help on using the repository browser.

Download in other formats: