source: main/trunk/model-sites-dev/heritage-nz/collect/pdf-reports/prepare/xls-to-sanitized-csv.py@ 33191

Last change on this file since 33191 was 33191, checked in by davidb, 5 years ago

Code tidy-up

File size: 2.1 KB
Line 
1import xlrd
2import csv
3import os
4
5import xlsutil
6
7def excel_to_sanitized_csv(excel_filename):
8 workbook = xlrd.open_workbook(excel_filename)
9
10 worksheet = workbook.sheet_by_index(xlsutil.worksheet_index_pos)
11 ## worksheet = workbook.sheet_by_name(xlsutil.worksheet_name)
12
13 excel_filename_split = os.path.split(excel_filename)
14 excel_dirname = excel_filename_split[0]
15 excel_file = excel_filename_split[1]
16 excel_file_splitext = os.path.splitext(excel_file)
17 excel_file_root = excel_file_splitext[0];
18
19 sanitized_csv_filename = os.path.join(excel_dirname,excel_file_root+".csv")
20
21 sanitized_csv_ofile = open(sanitized_csv_filename, 'wb')
22
23 sanitized_csv_wr = csv.writer(sanitized_csv_ofile, quoting=csv.QUOTE_ALL)
24
25 # 1. For header line, Build up hashmap of header-names to column number
26 # 2. Write out CSV file for only the header-names in 'sanitized_headers'
27
28 num_rows = worksheet.nrows
29
30 # 1. get header-map
31 header_names_mapping = xlsutil.getHeaderMappingToColNum(worksheet)
32
33 if header_names_mapping is None:
34 print "Failed to find \"Report Identifier\" header in spreadsheet (Row 0 or Sheet 0)"
35 exit(1)
36
37 # Move on to the start of the data values
38 row_i = 1
39
40 # 2. Write out CSV file ...
41 while row_i<num_rows:
42 # Work through sanitized header names building up row of utf8 values
43 sanitized_utf8_row = []
44
45 for header_name in xlsutil.sanitized_headers:
46
47 if header_name in header_names_mapping:
48 header_col_j = header_names_mapping[header_name]
49
50 cell_value = worksheet.cell_value(row_i,header_col_j)
51
52 cell_value_utf8 = unicode(cell_value).encode("utf-8")
53
54 sanitized_utf8_row.append(cell_value_utf8)
55 else:
56 print("Warning: Failed to column mapping in spreadsheet for header name \""+header_name+"\" => skipping")
57
58 sanitized_csv_wr.writerow(sanitized_utf8_row)
59
60 row_i = row_i + 1
61
62 sanitized_csv_ofile.close()
63
64if __name__ == "__main__":
65 excel_to_sanitized_csv(os.sys.argv[1])
66
Note: See TracBrowser for help on using the repository browser.