Changeset 34861
- Timestamp:
- 2021-02-15T16:37:20+13:00 (3 years ago)
- Location:
- main/trunk/model-sites-dev/eurovision-lod/collect/eurovision/prepare/voting-excel
- Files:
-
- 1 added
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/model-sites-dev/eurovision-lod/collect/eurovision/prepare/voting-excel/xlsx-to-jsonmetadata.py
r34860 r34861 6 6 import argparse 7 7 8 # import xlrd9 8 import openpyxl 10 9 11 #import csv 12 13 #import xlsutil 14 15 #filter_headers = [ 16 # u"Heading 1", u"Heading 2" 17 # # ... and so on 18 #] 19 20 filter_headers = None 21 22 def load_xslx_sheet(excel_input_filename,sheetname): 23 print "Loading: " + excel_input_filename 24 print " ..." 25 workbook = openpyxl.load_workbook(excel_input_filename, data_only=True) 26 print " ......Done" 27 28 print " Loaded worksheets: ", workbook.sheetnames 29 30 worksheet = None 31 32 if (sheetname == None): 33 print " Selecting default/active worksheet" 34 worksheet = workbook.active 35 else: 36 print " Selecting worksheet '"+sheetname+"'" 37 worksheet = workbook[sheetname] 38 39 print "" 40 return worksheet 41 42 43 def header_to_column_mapping(worksheet,header_row_pos): 44 45 # For header line, build up hashmap of header-names to column number 46 47 header_names_mapping = {} 48 49 min_col = worksheet.min_column 50 max_col = worksheet.max_column 51 52 found_header = False 53 for col_j in range(min_col,max_col+1): 54 header_cell = worksheet.cell(row=header_row_pos, column=col_j) 55 header_cell_value = header_cell.value.strip() 56 57 if header_cell_value: 58 header_names_mapping[header_cell_value] = col_j 59 60 return header_names_mapping 61 62 63 def convert_worksheet_to_hashmaps(worksheet): 64 65 print "Converting worksheet to array header-name keyed array of hashmaps" 66 67 data_hashmap_array = []; 68 69 # scan through rows until non-empty/headernames encountered 70 71 header_row_pos = None; 72 73 row_i = 1 74 for row in worksheet.values: 75 for value in row: 76 if (value != None) : 77 header_row_pos = row_i 78 break 79 80 print(value) 81 if (header_row_pos != None): break 82 row_i = row_i + 1 83 84 print " Found headername data in row: ", header_row_pos 85 86 header_names_mapping = header_to_column_mapping(worksheet,header_row_pos) 87 header_names = header_names_mapping.keys() 88 89 # Move on to next row, to start processing the data values 90 row_i = row_i + 1 91 num_rows = worksheet.max_row; 92 93 while row_i<=num_rows: 94 # Work through the provided header names, building up hashmap of header-name to value 95 data_hashmap = {} 96 97 for header_name in header_names: 98 99 header_col_j = header_names_mapping[header_name] 100 101 data_cell = worksheet.cell(row=row_i,column=header_col_j) 102 data_value = data_cell.value 103 104 data_hashmap[header_name] = data_value 105 106 row_i = row_i + 1 107 108 data_hashmap_array.append(data_hashmap) 109 110 print "" 111 return data_hashmap_array 10 import xlsxutil 11 112 12 113 13 def fileset_voting_for_esc_country_in_year(data_hashmap): … … 247 147 json_output_filename = os.path.splitext(excel_input_filename)[0]+'.json' 248 148 249 worksheet = load_xslx_sheet(excel_input_filename,sheetname)250 251 data_hashmap_array = convert_worksheet_to_hashmaps(worksheet)149 worksheet = xlsxutil.load_xslx_sheet(excel_input_filename,sheetname) 150 151 data_hashmap_array = xlsxutil.convert_worksheet_to_hashmaps(worksheet) 252 152 253 153 print "Number of data rows: ", len(data_hashmap_array) 254 154 155 255 156 to_country_year_voting_groups = create_to_country_voting_groups(data_hashmap_array) 256 157 … … 286 187 ### TODO 287 188 288 ## fix up names to match those used in DBpedia ESC289 189 ## provide metadata value that lists the countries-year-type to voted 290 190 291 # F.Y.R.Macedonia2012292 # Serbia&Montenegro2004.id : 35 votes293 # Bosnia&Herzegovina2005.id : 38 votes294 # TheNetherlands295 296 297 # From ESC:298 # Republic of Macedonia (2005)299 # Netherlands (not The Netherlands)300 # Federal Republic of Yugoslavia (1992)301 302 303 191 print "Creating Greenstone JSON voting metadata for:" 304 192 for to_country_year_votes in to_country_year_voting_groups:
Note:
See TracChangeset
for help on using the changeset viewer.