import xlrd import csv import os import sys import xlsutil def excel_to_sanitized_csv(excel_filename,sanitized_csv_filename): workbook = xlrd.open_workbook(excel_filename) worksheet = workbook.sheet_by_index(xlsutil.worksheet_index_pos) ## worksheet = workbook.sheet_by_name(xlsutil.worksheet_name) sanitized_csv_ofile = open(sanitized_csv_filename, 'wb') sanitized_csv_wr = csv.writer(sanitized_csv_ofile, quoting=csv.QUOTE_ALL) # 1. For header line, Build up hashmap of header-names to column number # 2. Write out CSV file for only the header-names in 'sanitized_headers' num_rows = worksheet.nrows # 1. get header-map header_names_mapping = xlsutil.getHeaderMappingToColNum(worksheet) if header_names_mapping is None: print "Failed to find \"Report Identifier\" header in spreadsheet (Row 0 or Sheet 0)" exit(1) # 2. Write out CSV file ... # Work through all the row to the spreadsheet, including the header-names row_i = xlsutil.header_row_pos while row_i skipping") sanitized_csv_wr.writerow(sanitized_utf8_row) row_i = row_i + 1 sanitized_csv_ofile.close() if __name__ == "__main__": if len(os.sys.argv) != 3: print >> sys.stderr, "Usage: " + os.sys.argv[0] + "input.xsl|input.xslx output.csv" exit(1) excel_to_sanitized_csv(os.sys.argv[1],os.sys.argv[2])