1 | import xlrd
|
---|
2 |
|
---|
3 | # Currently access worksheet by index num
|
---|
4 | # Alternative is to do this by worksheet name
|
---|
5 | worksheet_index_pos = 0
|
---|
6 | ## worksheet_name = "Archaeological reports"
|
---|
7 |
|
---|
8 | sanitized_headers = [ "Report Identifier", "Author", "Title", "Produced By", "Date", "HNZPT Region", "File No",
|
---|
9 | "Authority No", "Site No", "Relevant TLA's", "Record Type" ]
|
---|
10 |
|
---|
11 | def format_if_int(cell_value):
|
---|
12 | # Numbers from worksheet are represented as floating-point type
|
---|
13 | # This causes a problem when it is an 'int' as it then gets
|
---|
14 | # written out as a floating point number (with a '.0')
|
---|
15 | # => test for float and when there is no value after
|
---|
16 | # the decimal point, explicitly cast to 'int'
|
---|
17 | if type(cell_value) is float:
|
---|
18 | if cell_value == int(cell_value):
|
---|
19 | cell_value = int(cell_value)
|
---|
20 |
|
---|
21 | return cell_value
|
---|
22 |
|
---|
23 |
|
---|
24 |
|
---|
25 | def getHeaderMappingToColNum(worksheet):
|
---|
26 |
|
---|
27 | # For header line, build up hashmap of header-names to column number
|
---|
28 | # Assume header line is first row of spreadsheet
|
---|
29 |
|
---|
30 | header_names_mapping = {}
|
---|
31 |
|
---|
32 | row_i = 0
|
---|
33 | num_header_cols = worksheet.row_len(row_i)
|
---|
34 |
|
---|
35 | found_header = False
|
---|
36 | for col_j in range(num_header_cols):
|
---|
37 | header_cell_value = worksheet.cell_value(row_i,col_j)
|
---|
38 | if header_cell_value == "Report Identifier":
|
---|
39 | found_header = True
|
---|
40 |
|
---|
41 | header_names_mapping[header_cell_value] = col_j
|
---|
42 |
|
---|
43 | if not found_header:
|
---|
44 | header_names_mapping = None
|
---|
45 |
|
---|
46 | return header_names_mapping
|
---|
47 |
|
---|
48 |
|
---|
49 | # The following can be used on an XSL file, but not currently an XSLX file
|
---|
50 | # This is because, at the time of coding, xlrd does not support loading
|
---|
51 | # in an XSLT workbook with formatting_info=True
|
---|
52 |
|
---|
53 | # https://code.activestate.com/recipes/546518-simple-conversion-of-excel-files-into-csv-and-yaml/download/1/
|
---|
54 | def format_excelval(book, type, value, wanttupledate):
|
---|
55 | """ Clean up the incoming excel data """
|
---|
56 | ## Data Type Codes:
|
---|
57 | ## EMPTY 0
|
---|
58 | ## TEXT 1 a Unicode string
|
---|
59 | ## NUMBER 2 float
|
---|
60 | ## DATE 3 float
|
---|
61 | ## BOOLEAN 4 int; 1 means TRUE, 0 means FALSE
|
---|
62 | ## ERROR 5
|
---|
63 | returnrow = []
|
---|
64 | if type == 2: # TEXT
|
---|
65 | if value == int(value): value = int(value)
|
---|
66 | elif type == 3: # NUMBER
|
---|
67 | datetuple = xlrd.xldate_as_tuple(value, book.datemode)
|
---|
68 | value = datetuple if wanttupledate else tupledate_to_isodate(datetuple)
|
---|
69 | elif type == 5: # ERROR
|
---|
70 | value = xlrd.error_text_from_code[value]
|
---|
71 | return value
|
---|
72 |
|
---|