source: main/trunk/model-sites-dev/heritage-nz/collect/pdf-reports/prepare/xlsutil.py@ 33191

Last change on this file since 33191 was 33191, checked in by davidb, 5 years ago

Code tidy-up

File size: 2.4 KB
Line 
1import xlrd
2
3# Currently access worksheet by index num
4# Alternative is to do this by worksheet name
5worksheet_index_pos = 0
6## worksheet_name = "Archaeological reports"
7
8sanitized_headers = [ "Report Identifier", "Author", "Title", "Produced By", "Date", "HNZPT Region", "File No",
9 "Authority No", "Site No", "Relevant TLA's", "Record Type" ]
10
11def format_if_int(cell_value):
12 # Numbers from worksheet are represented as floating-point type
13 # This causes a problem when it is an 'int' as it then gets
14 # written out as a floating point number (with a '.0')
15 # => test for float and when there is no value after
16 # the decimal point, explicitly cast to 'int'
17 if type(cell_value) is float:
18 if cell_value == int(cell_value):
19 cell_value = int(cell_value)
20
21 return cell_value
22
23
24
25def getHeaderMappingToColNum(worksheet):
26
27 # For header line, build up hashmap of header-names to column number
28 # Assume header line is first row of spreadsheet
29
30 header_names_mapping = {}
31
32 row_i = 0
33 num_header_cols = worksheet.row_len(row_i)
34
35 found_header = False
36 for col_j in range(num_header_cols):
37 header_cell_value = worksheet.cell_value(row_i,col_j)
38 if header_cell_value == "Report Identifier":
39 found_header = True
40
41 header_names_mapping[header_cell_value] = col_j
42
43 if not found_header:
44 header_names_mapping = None
45
46 return header_names_mapping
47
48
49# The following can be used on an XSL file, but not currently an XSLX file
50# This is because, at the time of coding, xlrd does not support loading
51# in an XSLT workbook with formatting_info=True
52
53# https://code.activestate.com/recipes/546518-simple-conversion-of-excel-files-into-csv-and-yaml/download/1/
54def format_excelval(book, type, value, wanttupledate):
55 """ Clean up the incoming excel data """
56 ## Data Type Codes:
57 ## EMPTY 0
58 ## TEXT 1 a Unicode string
59 ## NUMBER 2 float
60 ## DATE 3 float
61 ## BOOLEAN 4 int; 1 means TRUE, 0 means FALSE
62 ## ERROR 5
63 returnrow = []
64 if type == 2: # TEXT
65 if value == int(value): value = int(value)
66 elif type == 3: # NUMBER
67 datetuple = xlrd.xldate_as_tuple(value, book.datemode)
68 value = datetuple if wanttupledate else tupledate_to_isodate(datetuple)
69 elif type == 5: # ERROR
70 value = xlrd.error_text_from_code[value]
71 return value
72
Note: See TracBrowser for help on using the repository browser.