[33188] | 1 | import xlrd
|
---|
| 2 |
|
---|
[33191] | 3 | # Currently access worksheet by index num
|
---|
| 4 | # Alternative is to do this by worksheet name
|
---|
| 5 | worksheet_index_pos = 0
|
---|
| 6 | ## worksheet_name = "Archaeological reports"
|
---|
| 7 |
|
---|
[33222] | 8 | sanitized_headers = [ u"Report Identifier", u"Author", u"Title", u"Produced By", u"Date", u"HNZPT Region", u"File No",
|
---|
| 9 | u"Authority No", u"Site No", u"Relevant TLA's", u"Record Type", u"Date Entered" ]
|
---|
[33188] | 10 |
|
---|
[33222] | 11 | def make_unicode(value):
|
---|
| 12 | value_unicode = value
|
---|
| 13 |
|
---|
| 14 | if type(value) != unicode:
|
---|
| 15 | value_str = value
|
---|
[33249] | 16 | if type(value) != str:
|
---|
| 17 |
|
---|
| 18 | # numbers in the spreadsheet default to 'float'
|
---|
| 19 | # but if actually a whole number, want it be
|
---|
| 20 | # type cast to an 'int' otherwise a '.0' ends
|
---|
| 21 | # up on the end of the string version
|
---|
| 22 | if type(value) is float:
|
---|
| 23 | if value == int(value):
|
---|
| 24 | value = int(value)
|
---|
| 25 |
|
---|
[33222] | 26 | value_str = str(value)
|
---|
| 27 |
|
---|
| 28 | value_unicode = value_str.decode('utf-8')
|
---|
| 29 |
|
---|
| 30 | return value_unicode
|
---|
| 31 |
|
---|
| 32 |
|
---|
| 33 | def cell_value_tidy_unicode(worksheet,row_i,col_j):
|
---|
| 34 | value= worksheet.cell_value(row_i,col_j)
|
---|
| 35 |
|
---|
| 36 | value_unicode = make_unicode(value)
|
---|
| 37 | value_unicode_stripped = value_unicode.strip()
|
---|
| 38 |
|
---|
| 39 | return value_unicode_stripped
|
---|
| 40 |
|
---|
| 41 |
|
---|
[33188] | 42 | def format_if_int(cell_value):
|
---|
| 43 | # Numbers from worksheet are represented as floating-point type
|
---|
| 44 | # This causes a problem when it is an 'int' as it then gets
|
---|
| 45 | # written out as a floating point number (with a '.0')
|
---|
| 46 | # => test for float and when there is no value after
|
---|
| 47 | # the decimal point, explicitly cast to 'int'
|
---|
| 48 | if type(cell_value) is float:
|
---|
| 49 | if cell_value == int(cell_value):
|
---|
| 50 | cell_value = int(cell_value)
|
---|
| 51 |
|
---|
| 52 | return cell_value
|
---|
| 53 |
|
---|
| 54 |
|
---|
| 55 |
|
---|
[33202] | 56 | header_row_pos = 0
|
---|
| 57 |
|
---|
[33188] | 58 | def getHeaderMappingToColNum(worksheet):
|
---|
| 59 |
|
---|
| 60 | # For header line, build up hashmap of header-names to column number
|
---|
| 61 | # Assume header line is first row of spreadsheet
|
---|
| 62 |
|
---|
| 63 | header_names_mapping = {}
|
---|
| 64 |
|
---|
[33202] | 65 | num_header_cols = worksheet.row_len(header_row_pos)
|
---|
[33188] | 66 |
|
---|
| 67 | found_header = False
|
---|
| 68 | for col_j in range(num_header_cols):
|
---|
[33222] | 69 | header_cell_value = cell_value_tidy_unicode(worksheet,header_row_pos,col_j)
|
---|
| 70 | if header_cell_value == u"Report Identifier":
|
---|
[33188] | 71 | found_header = True
|
---|
| 72 |
|
---|
[33202] | 73 | if header_cell_value and header_cell_value.strip():
|
---|
| 74 | header_names_mapping[header_cell_value] = col_j
|
---|
[33188] | 75 |
|
---|
| 76 | if not found_header:
|
---|
| 77 | header_names_mapping = None
|
---|
| 78 |
|
---|
| 79 | return header_names_mapping
|
---|
| 80 |
|
---|
| 81 |
|
---|
| 82 | # The following can be used on an XSL file, but not currently an XSLX file
|
---|
| 83 | # This is because, at the time of coding, xlrd does not support loading
|
---|
| 84 | # in an XSLT workbook with formatting_info=True
|
---|
| 85 |
|
---|
| 86 | # https://code.activestate.com/recipes/546518-simple-conversion-of-excel-files-into-csv-and-yaml/download/1/
|
---|
| 87 | def format_excelval(book, type, value, wanttupledate):
|
---|
| 88 | """ Clean up the incoming excel data """
|
---|
| 89 | ## Data Type Codes:
|
---|
| 90 | ## EMPTY 0
|
---|
| 91 | ## TEXT 1 a Unicode string
|
---|
| 92 | ## NUMBER 2 float
|
---|
| 93 | ## DATE 3 float
|
---|
| 94 | ## BOOLEAN 4 int; 1 means TRUE, 0 means FALSE
|
---|
| 95 | ## ERROR 5
|
---|
| 96 | returnrow = []
|
---|
| 97 | if type == 2: # TEXT
|
---|
| 98 | if value == int(value): value = int(value)
|
---|
| 99 | elif type == 3: # NUMBER
|
---|
| 100 | datetuple = xlrd.xldate_as_tuple(value, book.datemode)
|
---|
| 101 | value = datetuple if wanttupledate else tupledate_to_isodate(datetuple)
|
---|
| 102 | elif type == 5: # ERROR
|
---|
| 103 | value = xlrd.error_text_from_code[value]
|
---|
| 104 | return value
|
---|
| 105 |
|
---|