Ignore:
Timestamp:
2019-06-22T10:33:10+12:00 (5 years ago)
Author:
davidb
Message:

Code tidy-up

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/model-sites-dev/heritage-nz/collect/pdf-reports/prepare/xls-to-csv.py

    r33188 r33191  
    44
    55import xlsutil
    6 
    7 ## worksheet_name = "Archaeological reports"
    86
    97letter_to_folder = {
     
    4846
    4947
     48# **** Written, but never used
     49# **** Col numbers refer to older version of spreadsheet
     50#
     51# There is now a mapping of header-name to index position which would be
     52# a better way to do thing
     53
    5054def fixup_cell(col_num,cell_str):
    5155    # col == 1 => Author
     
    7478
    7579def excel_to_bound_pdf_csv(excel_filename):
    76     #workbook = xlrd.open_workbook(excel_filename, formatting_info=True)
    7780    workbook = xlrd.open_workbook(excel_filename)
    78     ## worksheet = workbook.sheet_by_name(worksheet_name)
    79     worksheet = workbook.sheet_by_index(0)
     81
     82    worksheet = workbook.sheet_by_index(xlsutil.worksheet_index_pos)
     83    ## worksheet = workbook.sheet_by_name(xlsutil.worksheet_name)
    8084
    8185    excel_filename_split = os.path.split(excel_filename)
     
    8892    unbound_filename  = os.path.join(excel_dirname,"UNBOUND "+excel_file_root+".csv")
    8993
    90     # print "Worksheet: " + worksheet_name
    9194    csv_pdfbound_ofile = open(pdfbound_filename, 'wb')
    92     #csv_unbound_ofile  = open('{}.csv'.format(excel_file_root), 'wb')
    9395    csv_unbound_ofile  = open(unbound_filename, 'wb')
    9496
    9597    pdfbound_wr = csv.writer(csv_pdfbound_ofile, quoting=csv.QUOTE_ALL)
    9698    unbound_wr  = csv.writer(csv_unbound_ofile, quoting=csv.QUOTE_ALL)
    97 
    98 #    # 1. Skip lines until Header encountered (as defined by encountering "Report Identifier")
    99 #    # 2. Write out Header line as Greenstone friendly Metadata terms
    100 #    #      => remove sp spaces, change "Report Identifier" to hnz.Identifier
    101 #    # 3. Process the rest of the file, checking if accompanying
    102 #    #     PDF file present or not
    103 
    10499
    105100    # 1. For header-line, build up hashmap of header-names to column number
     
    110105    #     specified in 'sanitized_headers'
    111106
    112 
    113107    num_rows = worksheet.nrows
    114108
    115     # 1, Skip lines until Header encountered
    116 #    row_i = 0
    117 #    found_header = False
    118 #    while row_i<num_rows:
    119 #        first_cell = worksheet.cell_value(row_i,0);
    120 #        if first_cell == "Report Identifier":
    121 #            found_header = True
    122 #            break
    123 #        print "Skipping row {} as not yet encountered 'Report Identifier' metadata label in column 0".format(row_i)
    124 #        row_i = row_i + 1
    125 
     109    # 1. Get header-line hashmap of header-names to column numbers
    126110    header_names_mapping = xlsutil.getHeaderMappingToColNum(worksheet)
    127111
    128     # 2. Process Header into Greenstone friendly metadata terms     
    129112    if header_names_mapping is None:
    130113        print "Failed to find metadata label 'Report Identifier' in column 0"
    131114        exit()
    132115    else:
     116        # 2. Process Header into Greenstone friendly metadata terms     
     117
    133118        entry_utf8_row = []
    134119        row_i = 0;
     
    164149        pdfbound_wr.writerow(entry_utf8_row)
    165150                     
    166         # 3. Process the rest of the file ...
     151        # 3. Process the rest of the file (metadata values) ...
    167152
    168153        row_i = row_i + 1
     
    179164                    col_j = header_names_mapping[header_name]
    180165
    181 #                    cell = worksheet.cell(row_i,col_j)
    182 
    183 #                    cell_type  = worksheet.cell_type(row_i,col_j)
    184166                    cell_value = worksheet.cell_value(row_i,col_j)
    185167
    186 #                    formatted_cell_value = format_excelval(workbook,cell_type,cell_value,False)
    187168                    formatted_cell_value = xlsutil.format_if_int(cell_value)
    188169
    189 #                    if col_j == 0:
    190170                    if header_name == "Report Identifier":
    191171                        # Check to see if companion PDF file present
Note: See TracChangeset for help on using the changeset viewer.