Show
Ignore:
Timestamp:
22.06.2019 17:13:38 (4 months ago)
Author:
davidb
Message:

Bound and Unbound CSV files changed to pring out all non-empty header columns

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • main/trunk/model-sites-dev/heritage-nz/collect/pdf-reports/prepare/xls-to-csv.py

    r33191 r33202  
    103103    # 3. Process the rest of the file, checking if accompanying  
    104104    #     PDF file present or not and only printing out the header-names 
    105     #     specified in 'sanitized_headers' 
     105    #     as long as it has a non-empty header-name 
     106##    #     specified in 'sanitized_headers' 
    106107 
    107108    num_rows = worksheet.nrows 
     
    117118 
    118119        entry_utf8_row = [] 
    119         row_i = 0; 
    120  
    121         for header_name in xlsutil.sanitized_headers: 
    122  
    123             if header_name in header_names_mapping: 
     120        num_header_cols = worksheet.row_len(xlsutil.header_row_pos) 
     121 
     122        for col_j in range(num_header_cols): 
     123##        for header_name in xlsutil.sanitized_headers: 
     124 
     125            header_name = worksheet.cell_value(xlsutil.header_row_pos,col_j) 
     126 
     127            if header_name and header_name.strip(): 
     128####                print "*** 2. header_name = " + header_name 
     129 
    124130                header_col_j = header_names_mapping[header_name] 
    125  
    126                 header_cell_value = worksheet.cell_value(row_i,header_col_j) 
     131##            if header_name in header_names_mapping: 
     132##                header_col_j = header_names_mapping[header_name] 
     133 
     134##                header_cell_value = worksheet.cell_value(row_i,header_col_j) 
    127135                 
    128                 if header_cell_value == "Report Identifier": 
    129                     header_cell_value = "hnz.Identifier" 
    130  
    131                 if header_cell_value == "Title": 
    132                     header_cell_value = "dc.Title" 
    133  
    134                 if header_cell_value == "RelevantTLA's": 
    135                     header_cell_value = "TLA" 
    136  
    137                 header_cell_value_utf8 = unicode(header_cell_value).encode("utf-8") 
    138                 metadata_name_utf8 = header_cell_value_utf8.replace(" ", "") 
     136                if header_name == "Report Identifier": 
     137                    header_name = "hnz.Identifier" 
     138 
     139                if header_name == "Title": 
     140                    header_name = "dc.Title" 
     141 
     142                if header_name == "RelevantTLA's": 
     143                    header_name = "TLA" 
     144 
     145                header_name_utf8 = unicode(header_name).encode("utf-8") 
     146                metadata_name_utf8 = header_name_utf8.replace(" ", "") 
    139147 
    140148                entry_utf8_row.append(metadata_name_utf8) 
    141149 
    142             else: 
    143                 print("Warning: Failed to column mapping in spreadsheet for header name \""+header_name+"\" => skipping") 
     150##            else: 
     151##                print("Warning: Failed to column mapping in spreadsheet for header name \""+header_name+"\" => skipping") 
    144152 
    145153        unbound_wr.writerow(entry_utf8_row) 
     
    151159        # 3. Process the rest of the file (metadata values) ... 
    152160 
    153         row_i = row_i + 1 
     161        row_i = xlsutil.header_row_pos + 1 
    154162        while row_i<num_rows: 
    155163            num_cols = worksheet.row_len(row_i) 
     
    158166            found_pdf = False 
    159167 
    160 #            for col_j in range(num_cols): 
    161             for header_name in xlsutil.sanitized_headers: 
    162  
    163                 if header_name in header_names_mapping: 
     168            for col_j in range(num_cols): 
     169##            for header_name in xlsutil.sanitized_headers: 
     170                header_name = worksheet.cell_value(xlsutil.header_row_pos,col_j) 
     171 
     172                if header_name and header_name.strip(): 
    164173                    col_j = header_names_mapping[header_name] 
     174##                if header_name in header_names_mapping: 
     175##                    col_j = header_names_mapping[header_name] 
    165176 
    166177                    cell_value = worksheet.cell_value(row_i,col_j) 
     
    201212 
    202213                    formatted_utf8_row.append(formatted_cell_value_utf8) 
    203                 else: 
    204                     print "Warning: No column number mapping for header name \""+header_name+"\" => skipping" 
     214##                else: 
     215##                    print "Warning: No column number mapping for header name \""+header_name+"\" => skipping" 
    205216 
    206217            if found_pdf: