- Timestamp:
- 2019-02-26T11:35:55+13:00 (5 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/model-sites-dev/heritage-nz/collect/reports-2019/prepare/xls-to-csv.py
r32813 r32820 49 49 unbound_wr = csv.writer(csv_unbound_ofile, quoting=csv.QUOTE_ALL) 50 50 51 # 1. Skip lines until "Report Identifier" encountered52 # 2. Remove "Report Identifier" line entries to turn53 # into Greenstone friendly metadata names51 # 1. Skip lines until Header encountered (as defined by encountering "Report Identifier") 52 # 2. Write out Header line as Greenstone friendly Metadata terms 53 # => remove sp spaces, change "Report Identifier" to hnz.Identifier 54 54 # 3. Process the rest of the file, checking if accompanying 55 55 # PDF file present or not … … 57 57 num_rows = worksheet.nrows 58 58 59 # 1, Skip lines until Header encountered 59 60 row_i = 0 60 61 found_header = False … … 67 68 row_i = row_i + 1 68 69 70 # 2. Process Header into Greenstone friendly metadata terms 69 71 if found_header: 70 72 entry_utf8_row = [] … … 87 89 while row_i<num_rows: 88 90 num_cols = worksheet.row_len(row_i) 89 entry_utf8_row = []91 formatted_utf8_row = [] 90 92 91 93 found_pdf = False 92 94 93 # for entry in worksheet.row_values(row_i):94 95 for col_j in range(num_cols): 95 96 cell = worksheet.cell(row_i,col_j) 96 # xf = workbook.xf_list[cell.xf_index]97 # format = workbook.format_map[xf.format_key]98 # format_str = format.format_str99 100 # print 'rowx=%d colx=%d ctype=%d xfx=%d s_value=%s fmt=%s' \101 # % (row_i, col_j, cell.ctype, cell.xf_index, str(cell.value), format_str)102 97 103 98 cell_type = worksheet.cell_type(row_i,col_j) 104 99 cell_value = worksheet.cell_value(row_i,col_j) 105 100 106 format_cell = format_excelval(workbook,cell_type,cell_value,False) 107 #print "**** format cell str = " +str(format_cell) 108 109 # entry = worksheet.cell_value(row_i,col_j) 110 entry = format_cell 101 formatted_cell = format_excelval(workbook,cell_type,cell_value,False) 111 102 112 103 if col_j == 0: 113 104 # Check to see if companion PDF file present 114 pdf_filename = os.path.join(excel_dirname,"pdfs", entry+".pdf")105 pdf_filename = os.path.join(excel_dirname,"pdfs",formatted_cell+".pdf") 115 106 if os.path.exists(pdf_filename): 116 107 found_pdf = True 117 pdf_file = os.path.join("pdfs", entry+".pdf")118 entry_utf8_row.insert(0, pdf_file)108 pdf_file = os.path.join("pdfs",formatted_cell+".pdf") 109 formatted_utf8_row.insert(0, pdf_file) 119 110 120 entry_utf8 = unicode(entry).encode("utf-8")121 entry_utf8_row.append(entry_utf8)111 formatted_cell_utf8 = unicode(formatted_cell).encode("utf-8") 112 formatted_utf8_row.append(formatted_cell_utf8) 122 113 if found_pdf: 123 pdfbound_wr.writerow( entry_utf8_row)114 pdfbound_wr.writerow(formatted_utf8_row) 124 115 else: 125 unbound_wr.writerow( entry_utf8_row)116 unbound_wr.writerow(formatted_utf8_row) 126 117 127 118 row_i = row_i + 1
Note:
See TracChangeset
for help on using the changeset viewer.