root/main/trunk/model-sites-dev/heritage-nz/collect/pdf-reports/prepare/xls-to-sanitized-csv.py @ 33224

Revision 33224, 2.0 KB (checked in by davidb, 14 months ago)

Further code tidy-up

Line 
1import xlrd
2import csv
3import os
4import sys
5
6import xlsutil
7
8def excel_to_sanitized_csv(excel_filename,sanitized_csv_filename):
9    workbook = xlrd.open_workbook(excel_filename)
10
11    worksheet = workbook.sheet_by_index(xlsutil.worksheet_index_pos)
12    ## worksheet = workbook.sheet_by_name(xlsutil.worksheet_name)
13
14    sanitized_csv_ofile = open(sanitized_csv_filename, 'wb')
15
16    sanitized_csv_wr  = csv.writer(sanitized_csv_ofile, quoting=csv.QUOTE_ALL)
17
18    # 1. For header line, Build up hashmap of header-names to column number
19    # 2. Write out CSV file for only the header-names in 'sanitized_headers'
20
21    num_rows = worksheet.nrows
22
23    # 1. get header-map
24    header_names_mapping = xlsutil.getHeaderMappingToColNum(worksheet)
25
26    if header_names_mapping is None:
27        print "Failed to find \"Report Identifier\" header in spreadsheet (Row 0 or Sheet 0)"
28        exit(1)
29
30    # 2. Write out CSV file ...
31    # Work through all the row to the spreadsheet, including the header-names
32    row_i = xlsutil.header_row_pos
33   
34    while row_i<num_rows:
35        # Work through sanitized header names building up row of utf8 values
36        sanitized_utf8_row = []
37
38        for header_name in xlsutil.sanitized_headers:
39
40            if header_name in header_names_mapping:
41                header_col_j = header_names_mapping[header_name]
42
43                cell_value_unicode = xlsutil.cell_value_tidy_unicode(worksheet,row_i,header_col_j)
44                cell_value_utf8 = cell_value_unicode.encode("utf-8")
45
46                sanitized_utf8_row.append(cell_value_utf8)
47            else:
48                print("Warning: Failed to column mapping in spreadsheet for header name \""+header_name+"\" => skipping")
49               
50        sanitized_csv_wr.writerow(sanitized_utf8_row)
51
52        row_i = row_i + 1
53
54    sanitized_csv_ofile.close()
55
56if __name__ == "__main__":
57    if len(os.sys.argv) != 3:
58        print >> sys.stderr, "Usage: " + os.sys.argv[0] + "input.xsl|input.xslx output.csv"
59        exit(1)
60
61    excel_to_sanitized_csv(os.sys.argv[1],os.sys.argv[2])
62
Note: See TracBrowser for help on using the browser.