Ignore:
Timestamp:
2019-02-26T11:35:55+13:00 (3 years ago)
Author:
davidb
Message:

Tidy up

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/model-sites-dev/heritage-nz/collect/reports-2019/prepare/xls-to-csv.py

    r32813 r32820  
    4949    unbound_wr  = csv.writer(csv_unbound_ofile, quoting=csv.QUOTE_ALL)
    5050
    51     # 1. Skip lines until "Report Identifier" encountered
    52     # 2. Remove "Report Identifier" line entries to turn
    53     #     into Greenstone friendly metadata names
     51    # 1. Skip lines until Header encountered (as defined by encountering "Report Identifier")
     52    # 2. Write out Header line as Greenstone friendly Metadata terms
     53    #      => remove sp spaces, change "Report Identifier" to hnz.Identifier
    5454    # 3. Process the rest of the file, checking if accompanying
    5555    #     PDF file present or not
     
    5757    num_rows = worksheet.nrows
    5858
     59    # 1, Skip lines until Header encountered
    5960    row_i = 0
    6061    found_header = False
     
    6768        row_i = row_i + 1
    6869
     70    # 2. Process Header into Greenstone friendly metadata terms     
    6971    if found_header:
    7072        entry_utf8_row = []
     
    8789        while row_i<num_rows:
    8890            num_cols = worksheet.row_len(row_i)
    89             entry_utf8_row = []
     91            formatted_utf8_row = []
    9092
    9193            found_pdf = False
    9294
    93 #            for entry in worksheet.row_values(row_i):
    9495            for col_j in range(num_cols):
    9596                cell = worksheet.cell(row_i,col_j)
    96 #                xf = workbook.xf_list[cell.xf_index]
    97 #                format = workbook.format_map[xf.format_key]
    98 #                format_str = format.format_str
    99 
    100 #                print 'rowx=%d colx=%d ctype=%d xfx=%d s_value=%s fmt=%s' \
    101 #                    % (row_i, col_j, cell.ctype, cell.xf_index, str(cell.value), format_str)
    10297
    10398                cell_type  = worksheet.cell_type(row_i,col_j)
    10499                cell_value = worksheet.cell_value(row_i,col_j)
    105100
    106                 format_cell = format_excelval(workbook,cell_type,cell_value,False)
    107                 #print "**** format cell str = " +str(format_cell)
    108 
    109                 # entry = worksheet.cell_value(row_i,col_j)
    110                 entry = format_cell
     101                formatted_cell = format_excelval(workbook,cell_type,cell_value,False)
    111102
    112103                if col_j == 0:
    113104                    # Check to see if companion PDF file present
    114                     pdf_filename = os.path.join(excel_dirname,"pdfs",entry+".pdf")
     105                    pdf_filename = os.path.join(excel_dirname,"pdfs",formatted_cell+".pdf")
    115106                    if os.path.exists(pdf_filename):
    116107                        found_pdf = True
    117                         pdf_file = os.path.join("pdfs",entry+".pdf")
    118                         entry_utf8_row.insert(0, pdf_file)
     108                        pdf_file = os.path.join("pdfs",formatted_cell+".pdf")
     109                        formatted_utf8_row.insert(0, pdf_file)
    119110                                 
    120                 entry_utf8 = unicode(entry).encode("utf-8")
    121                 entry_utf8_row.append(entry_utf8)
     111                formatted_cell_utf8 = unicode(formatted_cell).encode("utf-8")
     112                formatted_utf8_row.append(formatted_cell_utf8)
    122113            if found_pdf:
    123                 pdfbound_wr.writerow(entry_utf8_row)
     114                pdfbound_wr.writerow(formatted_utf8_row)
    124115            else:
    125                 unbound_wr.writerow(entry_utf8_row)
     116                unbound_wr.writerow(formatted_utf8_row)
    126117
    127118            row_i = row_i + 1
Note: See TracChangeset for help on using the changeset viewer.