Context Navigation

source: main/trunk/model-sites-dev/heritage-nz/collect/pdf-reports/prepare/xlsutil.py@ 33202

Last change on this file since 33202 was 33202, checked in by davidb, 5 years ago
Bound and Unbound CSV files changed to pring out all non-empty header columns
File size: 2.5 KB

Line
1	import xlrd
2
3	# Currently access worksheet by index num
4	# Alternative is to do this by worksheet name
5	worksheet_index_pos = 0
6	## worksheet_name = "Archaeological reports"
7
8	sanitized_headers = [ "Report Identifier", "Author", "Title", "Produced By", "Date", "HNZPT Region", "File No",
9	"Authority No", "Site No", "Relevant TLA's", "Record Type" ]
10
11	def format_if_int(cell_value):
12	# Numbers from worksheet are represented as floating-point type
13	# This causes a problem when it is an 'int' as it then gets
14	# written out as a floating point number (with a '.0')
15	# => test for float and when there is no value after
16	# the decimal point, explicitly cast to 'int'
17	if type(cell_value) is float:
18	if cell_value == int(cell_value):
19	cell_value = int(cell_value)
20
21	return cell_value
22
23
24
25	header_row_pos = 0
26
27	def getHeaderMappingToColNum(worksheet):
28
29	# For header line, build up hashmap of header-names to column number
30	# Assume header line is first row of spreadsheet
31
32	header_names_mapping = {}
33
34	num_header_cols = worksheet.row_len(header_row_pos)
35
36	found_header = False
37	for col_j in range(num_header_cols):
38	header_cell_value = worksheet.cell_value(header_row_pos,col_j)
39	if header_cell_value == "Report Identifier":
40	found_header = True
41
42	if header_cell_value and header_cell_value.strip():
43	header_names_mapping[header_cell_value] = col_j
44
45	if not found_header:
46	header_names_mapping = None
47
48	return header_names_mapping
49
50
51	# The following can be used on an XSL file, but not currently an XSLX file
52	# This is because, at the time of coding, xlrd does not support loading
53	# in an XSLT workbook with formatting_info=True
54
55	# https://code.activestate.com/recipes/546518-simple-conversion-of-excel-files-into-csv-and-yaml/download/1/
56	def format_excelval(book, type, value, wanttupledate):
57	""" Clean up the incoming excel data """
58	## Data Type Codes:
59	## EMPTY 0
60	## TEXT 1 a Unicode string
61	## NUMBER 2 float
62	## DATE 3 float
63	## BOOLEAN 4 int; 1 means TRUE, 0 means FALSE
64	## ERROR 5
65	returnrow = []
66	if type == 2: # TEXT
67	if value == int(value): value = int(value)
68	elif type == 3: # NUMBER
69	datetuple = xlrd.xldate_as_tuple(value, book.datemode)
70	value = datetuple if wanttupledate else tupledate_to_isodate(datetuple)
71	elif type == 5: # ERROR
72	value = xlrd.error_text_from_code[value]
73	return value
74

Note: See TracBrowser for help on using the repository browser.

Download in other formats: