Context Navigation

source: main/trunk/model-sites-dev/heritage-nz/collect/pdf-reports/prepare/xlsutil.py@ 33249

Last change on this file since 33249 was 33249, checked in by davidb, 5 years ago
Further checking needed when converting cell-value to be in unicode format
File size: 3.3 KB

Rev	Line
[33188]	1	import xlrd
	2
[33191]	3	# Currently access worksheet by index num
	4	# Alternative is to do this by worksheet name
	5	worksheet_index_pos = 0
	6	## worksheet_name = "Archaeological reports"
	7
[33222]	8	sanitized_headers = [ u"Report Identifier", u"Author", u"Title", u"Produced By", u"Date", u"HNZPT Region", u"File No",
	9	u"Authority No", u"Site No", u"Relevant TLA's", u"Record Type", u"Date Entered" ]
[33188]	10
[33222]	11	def make_unicode(value):
	12	value_unicode = value
	13
	14	if type(value) != unicode:
	15	value_str = value
[33249]	16	if type(value) != str:
	17
	18	# numbers in the spreadsheet default to 'float'
	19	# but if actually a whole number, want it be
	20	# type cast to an 'int' otherwise a '.0' ends
	21	# up on the end of the string version
	22	if type(value) is float:
	23	if value == int(value):
	24	value = int(value)
	25
[33222]	26	value_str = str(value)
	27
	28	value_unicode = value_str.decode('utf-8')
	29
	30	return value_unicode
	31
	32
	33	def cell_value_tidy_unicode(worksheet,row_i,col_j):
	34	value= worksheet.cell_value(row_i,col_j)
	35
	36	value_unicode = make_unicode(value)
	37	value_unicode_stripped = value_unicode.strip()
	38
	39	return value_unicode_stripped
	40
	41
[33188]	42	def format_if_int(cell_value):
	43	# Numbers from worksheet are represented as floating-point type
	44	# This causes a problem when it is an 'int' as it then gets
	45	# written out as a floating point number (with a '.0')
	46	# => test for float and when there is no value after
	47	# the decimal point, explicitly cast to 'int'
	48	if type(cell_value) is float:
	49	if cell_value == int(cell_value):
	50	cell_value = int(cell_value)
	51
	52	return cell_value
	53
	54
	55
[33202]	56	header_row_pos = 0
	57
[33188]	58	def getHeaderMappingToColNum(worksheet):
	59
	60	# For header line, build up hashmap of header-names to column number
	61	# Assume header line is first row of spreadsheet
	62
	63	header_names_mapping = {}
	64
[33202]	65	num_header_cols = worksheet.row_len(header_row_pos)
[33188]	66
	67	found_header = False
	68	for col_j in range(num_header_cols):
[33222]	69	header_cell_value = cell_value_tidy_unicode(worksheet,header_row_pos,col_j)
	70	if header_cell_value == u"Report Identifier":
[33188]	71	found_header = True
	72
[33202]	73	if header_cell_value and header_cell_value.strip():
	74	header_names_mapping[header_cell_value] = col_j
[33188]	75
	76	if not found_header:
	77	header_names_mapping = None
	78
	79	return header_names_mapping
	80
	81
	82	# The following can be used on an XSL file, but not currently an XSLX file
	83	# This is because, at the time of coding, xlrd does not support loading
	84	# in an XSLT workbook with formatting_info=True
	85
	86	# https://code.activestate.com/recipes/546518-simple-conversion-of-excel-files-into-csv-and-yaml/download/1/
	87	def format_excelval(book, type, value, wanttupledate):
	88	""" Clean up the incoming excel data """
	89	## Data Type Codes:
	90	## EMPTY 0
	91	## TEXT 1 a Unicode string
	92	## NUMBER 2 float
	93	## DATE 3 float
	94	## BOOLEAN 4 int; 1 means TRUE, 0 means FALSE
	95	## ERROR 5
	96	returnrow = []
	97	if type == 2: # TEXT
	98	if value == int(value): value = int(value)
	99	elif type == 3: # NUMBER
	100	datetuple = xlrd.xldate_as_tuple(value, book.datemode)
	101	value = datetuple if wanttupledate else tupledate_to_isodate(datetuple)
	102	elif type == 5: # ERROR
	103	value = xlrd.error_text_from_code[value]
	104	return value
	105

Note: See TracBrowser for help on using the repository browser.

Download in other formats: