Context Navigation

← Previous Changeset
Next Changeset →

Changeset 33191

Timestamp:

2019-06-22T10:33:10+12:00 (5 years ago)

Author:

davidb

Message:

Code tidy-up

Location:

main/trunk/model-sites-dev/heritage-nz/collect/pdf-reports/prepare

Files:

: 3 edited

xls-to-csv.py (modified) (7 diffs)
xls-to-sanitized-csv.py (modified) (4 diffs)
xlsutil.py (modified) (1 diff)

Legend:

: Unmodified
: Added
: Removed

main/trunk/model-sites-dev/heritage-nz/collect/pdf-reports/prepare/xls-to-csv.py

-              r33188
+              r33191
 import xlsutil
-## worksheet_name = "Archaeological reports"
 letter_to_folder = {
 …
+# **** Written, but never used
+# **** Col numbers refer to older version of spreadsheet
+#
+# There is now a mapping of header-name to index position which would be
+# a better way to do thing
 def fixup_cell(col_num,cell_str):
     # col == 1 => Author
 …
 def excel_to_bound_pdf_csv(excel_filename):
-    #workbook = xlrd.open_workbook(excel_filename, formatting_info=True)
     workbook = xlrd.open_workbook(excel_filename)
+    ## worksheet = workbook.sheet_by_name(worksheet_name)
+    worksheet = workbook.sheet_by_index(0)
+    worksheet = workbook.sheet_by_index(xlsutil.worksheet_index_pos)
+    ## worksheet = workbook.sheet_by_name(xlsutil.worksheet_name)
     excel_filename_split = os.path.split(excel_filename)
 …
     unbound_filename  = os.path.join(excel_dirname,"UNBOUND "+excel_file_root+".csv")
-    # print "Worksheet: " + worksheet_name
     csv_pdfbound_ofile = open(pdfbound_filename, 'wb')
-    #csv_unbound_ofile  = open('{}.csv'.format(excel_file_root), 'wb')
     csv_unbound_ofile  = open(unbound_filename, 'wb')
     pdfbound_wr = csv.writer(csv_pdfbound_ofile, quoting=csv.QUOTE_ALL)
     unbound_wr  = csv.writer(csv_unbound_ofile, quoting=csv.QUOTE_ALL)
-#    # 1. Skip lines until Header encountered (as defined by encountering "Report Identifier")
-#    # 2. Write out Header line as Greenstone friendly Metadata terms
-#    #      => remove sp spaces, change "Report Identifier" to hnz.Identifier
-#    # 3. Process the rest of the file, checking if accompanying
-#    #     PDF file present or not
     # 1. For header-line, build up hashmap of header-names to column number
 …
     #     specified in 'sanitized_headers'
     num_rows = worksheet.nrows
+    # 1, Skip lines until Header encountered
+#    row_i = 0
+#    found_header = False
+#    while row_i<num_rows:
+#        first_cell = worksheet.cell_value(row_i,0);
+#        if first_cell == "Report Identifier":
+#            found_header = True
+#            break
+#        print "Skipping row {} as not yet encountered 'Report Identifier' metadata label in column 0".format(row_i)
+#        row_i = row_i + 1
+    # 1. Get header-line hashmap of header-names to column numbers
     header_names_mapping = xlsutil.getHeaderMappingToColNum(worksheet)
-    # 2. Process Header into Greenstone friendly metadata terms
     if header_names_mapping is None:
         print "Failed to find metadata label 'Report Identifier' in column 0"
         exit()
     else:
+        # 2. Process Header into Greenstone friendly metadata terms
         entry_utf8_row = []
         row_i = 0;
 …
         pdfbound_wr.writerow(entry_utf8_row)
         # 3. Process the rest of the file ...
+        # 3. Process the rest of the file (metadata values) ...
         row_i = row_i + 1
 …
                     col_j = header_names_mapping[header_name]
-#                    cell = worksheet.cell(row_i,col_j)
-#                    cell_type  = worksheet.cell_type(row_i,col_j)
                     cell_value = worksheet.cell_value(row_i,col_j)
-#                    formatted_cell_value = format_excelval(workbook,cell_type,cell_value,False)
                     formatted_cell_value = xlsutil.format_if_int(cell_value)
-#                    if col_j == 0:
                     if header_name == "Report Identifier":
                         # Check to see if companion PDF file present

main/trunk/model-sites-dev/heritage-nz/collect/pdf-reports/prepare/xls-to-sanitized-csv.py

-              r33188
+              r33191
 import xlsutil
-## worksheet_name = "Archaeological reports"
 def excel_to_sanitized_csv(excel_filename):
     workbook = xlrd.open_workbook(excel_filename)
+    # worksheet = workbook.sheet_by_name(worksheet_name)
+    worksheet = workbook.sheet_by_index(0)
+    worksheet = workbook.sheet_by_index(xlsutil.worksheet_index_pos)
+    ## worksheet = workbook.sheet_by_name(xlsutil.worksheet_name)
     excel_filename_split = os.path.split(excel_filename)
 …
     sanitized_csv_filename  = os.path.join(excel_dirname,excel_file_root+".csv")
-    # print "Worksheet: " + worksheet_name
     sanitized_csv_ofile = open(sanitized_csv_filename, 'wb')
 …
     num_rows = worksheet.nrows
-#    row_i = 0
-#    num_header_cols = worksheet.row_len(row_i)
     # 1. get header-map
     header_names_mapping = xlsutil.getHeaderMappingToColNum(worksheet)
-#    found_header = False
-#    for col_j in range(num_header_cols):
-#        header_cell_value = worksheet.cell_value(row_i,col_j)
-#        if header_cell_value == "Report Identifier":
-#            found_header = True
+#
-#        header_names_mapping[header_cell_value] = col_j
+#
-#    if not(found_header):
     if header_names_mapping is None:
         print "Failed to find \"Report Identifier\" header in spreadsheet (Row 0 or Sheet 0)"
 …
                 cell_value = worksheet.cell_value(row_i,header_col_j)
                 cell_value_utf8 = unicode(cell_value).encode("utf-8")

main/trunk/model-sites-dev/heritage-nz/collect/pdf-reports/prepare/xlsutil.py

-              r33188
+              r33191
 import xlrd
+# Currently access worksheet by index num
+# Alternative is to do this by worksheet name
+worksheet_index_pos = 0
+## worksheet_name = "Archaeological reports"
 sanitized_headers = [ "Report Identifier", "Author", "Title", "Produced By", "Date", "HNZPT Region", "File No",

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 33191

Legend:

main/trunk/model-sites-dev/heritage-nz/collect/pdf-reports/prepare/xls-to-csv.py

main/trunk/model-sites-dev/heritage-nz/collect/pdf-reports/prepare/xls-to-sanitized-csv.py

main/trunk/model-sites-dev/heritage-nz/collect/pdf-reports/prepare/xlsutil.py

Download in other formats: