Changeset 32808

Show
Ignore:
Timestamp:
24.02.2019 18:12:33 (8 months ago)
Author:
davidb
Message:

Splitting between unbound and pdf-bound added in

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • main/trunk/model-sites-dev/heritage-nz/collect/reports-2019/prepare/xls-to-csv.py

    r32807 r32808  
    1717    pdfbound_filename = os.path.join(excel_dirname,"PDF-BOUND "+excel_file_root+".csv") 
    1818    unbound_filename  = os.path.join(excel_dirname,"UNBOUND "+excel_file_root+".csv") 
    19  
    20     pdf_dirname = os.path.join(excel_dirname,"pdfs") 
    2119 
    2220    # print "Worksheet: " + worksheet_name 
     
    3937    found_header = False 
    4038    while row_i<num_rows: 
    41         first_cell = worksheet.row_values(row_i)[0]; 
     39        first_cell = worksheet.cell_value(row_i,0); 
    4240        if first_cell == "Report Identifier": 
    4341            found_header = True 
     
    5553        unbound_wr.writerow(entry_utf8_row) 
    5654 
     55        # Add in 'Filename' as first column in pdfbound CSV file 
     56        entry_utf8_row.insert(0, "Filename") 
     57        pdfbound_wr.writerow(entry_utf8_row) 
     58 
     59 
    5760        row_i = row_i + 1 
    5861        while row_i<num_rows: 
     62            num_cols = worksheet.row_len(row_i) 
    5963            entry_utf8_row = [] 
    60             for entry in worksheet.row_values(row_i): 
     64 
     65            found_pdf = False 
     66 
     67#            for entry in worksheet.row_values(row_i): 
     68            for col_j in range(num_cols): 
     69                entry = worksheet.cell_value(row_i,col_j) 
     70 
     71                if col_j == 0: 
     72                    # Check to see if companion PDF file present 
     73                    pdf_filename = os.path.join(excel_dirname,"pdfs",entry+".pdf") 
     74                    if os.path.exists(pdf_filename): 
     75                        found_pdf = True 
     76                        pdf_file = os.path.join("pdfs",entry+".pdf") 
     77                        entry_utf8_row.insert(0, pdf_file) 
     78                                  
    6179                entry_utf8 = unicode(entry).encode("utf-8") 
    6280                entry_utf8_row.append(entry_utf8) 
    63             unbound_wr.writerow(entry_utf8_row) 
     81            if found_pdf: 
     82                pdfbound_wr.writerow(entry_utf8_row) 
     83            else: 
     84                unbound_wr.writerow(entry_utf8_row) 
    6485 
    6586            row_i = row_i + 1