import xlrd import csv import os ## worksheet_name = "Archaeological reports" # https://code.activestate.com/recipes/546518-simple-conversion-of-excel-files-into-csv-and-yaml/download/1/ def format_excelval(book, type, value, wanttupledate): """ Clean up the incoming excel data """ ## Data Type Codes: ## EMPTY 0 ## TEXT 1 a Unicode string ## NUMBER 2 float ## DATE 3 float ## BOOLEAN 4 int; 1 means TRUE, 0 means FALSE ## ERROR 5 returnrow = [] if type == 2: # TEXT if value == int(value): value = int(value) elif type == 3: # NUMBER datetuple = xlrd.xldate_as_tuple(value, book.datemode) value = datetuple if wanttupledate else tupledate_to_isodate(datetuple) elif type == 5: # ERROR value = xlrd.error_text_from_code[value] return value def excel_to_bound_pdf_csv(excel_filename): workbook = xlrd.open_workbook(excel_filename, formatting_info=True) ## worksheet = workbook.sheet_by_name(worksheet_name) worksheet = workbook.sheet_by_index(0) excel_filename_split = os.path.split(excel_filename) excel_dirname = excel_filename_split[0] excel_file = excel_filename_split[1] excel_file_splitext = os.path.splitext(excel_file) excel_file_root = excel_file_splitext[0]; pdfbound_filename = os.path.join(excel_dirname,"PDF-BOUND "+excel_file_root+".csv") unbound_filename = os.path.join(excel_dirname,"UNBOUND "+excel_file_root+".csv") # print "Worksheet: " + worksheet_name csv_pdfbound_ofile = open(pdfbound_filename, 'wb') #csv_unbound_ofile = open('{}.csv'.format(excel_file_root), 'wb') csv_unbound_ofile = open(unbound_filename, 'wb') pdfbound_wr = csv.writer(csv_pdfbound_ofile, quoting=csv.QUOTE_ALL) unbound_wr = csv.writer(csv_unbound_ofile, quoting=csv.QUOTE_ALL) # 1. Skip lines until "Report Identifier" encountered # 2. Remove "Report Identifier" line entries to turn # into Greenstone friendly metadata names # 3. Process the rest of the file, checking if accompanying # PDF file present or not num_rows = worksheet.nrows row_i = 0 found_header = False while row_i