Changeset 37649
- Timestamp:
- 2023-04-11T23:52:30+12:00 (13 months ago)
- Location:
- gs3-installations/thewillow/trunk/sites/thewillow/collect/community-contributions
- Files:
-
- 4 edited
Legend:
- Unmodified
- Added
- Removed
-
gs3-installations/thewillow/trunk/sites/thewillow/collect/community-contributions/RECONFIGURE.sh
r37633 r37649 1 1 #!/bin/bash 2 2 3 source ../_local_collect_config.bash 4 3 5 wget -O - "${gs_library_url}?a=s&sa=c&sc=$col" >/dev/null -
gs3-installations/thewillow/trunk/sites/thewillow/collect/community-contributions/etc/collectionConfig.xml
r37647 r37649 32 32 </xsl:template> 33 33 </format> 34 <search type=" lucene">34 <search type="solr"> 35 35 <level name="document"> 36 36 <displayItem lang="en" name="name">document</displayItem> 37 37 </level> 38 <!-- 38 39 <level name="section"> 39 40 <displayItem lang="en" name="name">section</displayItem> 40 41 </level> 42 --> 41 43 <defaultLevel name="document"/> 42 <index name="text ">43 <displayItem lang="en" name="name">text</displayItem>44 <index name="text,Title,Creator,Description,Notes"> 45 <displayItem lang="en" name="name">all text</displayItem> 44 46 </index> 45 47 <index name="dc.Title,ex.dc.Title,Title"> 46 48 <displayItem lang="en" name="name">titles</displayItem> 47 49 </index> 48 <index name=" Source">49 <displayItem lang="en" name="name">filenames</displayItem>50 <index name="Creator"> 51 <displayItem lang="en" name="name">content creators</displayItem> 50 52 </index> 51 <defaultIndex name="text"/> 53 <index name="Description"> 54 <displayItem lang="en" name="name">descriptions</displayItem> 55 </index> 56 <index name="Notes"> 57 <displayItem lang="en" name="name">notes</displayItem> 58 </index> 59 <defaultIndex name="text,Title,Creator,Description,Notes"/> 60 52 61 <sort name="rank"> 53 62 <displayItem lang="en" name="name">rank</displayItem> 54 63 </sort> 64 <!-- 55 65 <sort name="none"> 56 66 <displayItem lang="en" name="name">natural (build) order</displayItem> 57 67 </sort> 68 --> 69 70 <facet name="InCopyright"> 71 <displayItem lang="en" name="name">In Copyright</displayItem> 72 </facet> 73 <facet name="Theme"> 74 <displayItem lang="en" name="name">Theme</displayItem> 75 </facet> 76 <facet name="ObjectType"> 77 <displayItem lang="en" name="name">Content Type</displayItem> 78 </facet> 79 58 80 <searchType name="plain"/> 59 81 <searchType name="simpleform"/> … … 81 103 <plugin name="ZIPPlugin"/> 82 104 <plugin name="GreenstoneXMLPlugin"/> 105 83 106 <plugin name="CSVPlugin"/> 107 <plugin name="ImagePlugin"> 108 <option name="enable_cache"/> 109 </plugin> 84 110 <plugin name="TextPlugin"/> 85 111 <plugin name="HTMLPlugin"/> 86 <plugin name="EmailPlugin"/> 112 <!-- <plugin name="EmailPlugin"/> --> 113 87 114 <plugin name="PDFv2Plugin"/> 88 115 <!-- Configuring an UnknownConverterPlugin for docx processing with Tika --> … … 117 144 <plugin name="PowerPointPlugin"/> 118 145 <plugin name="ExcelPlugin"/> 119 <plugin name="ImagePlugin"/>120 146 <plugin name="ISISPlugin"/> 121 147 <plugin name="NulPlugin"/> … … 126 152 </pluginList> 127 153 </import> 154 <importOption name="OIDtype" value="assigned"/> 155 <importOption name="OIDmetadata" value="DLIdentifier"/> 156 128 157 <browse> 129 158 <classifier name="List"> … … 244 273 (e.g. hierachical and paged documents) as it can prevent any sub-sections from showing. 245 274 --> 246 <!-- 275 276 <gsf:template name="documentPre"> 277 <!-- 278 Title 279 Creator 280 Description 281 CreationDate 282 OirginalOrCopy 283 FileNote 284 LibraryRefNum 285 InCopyright 286 Theme Theme Theme 287 ObjectType ObjectType ObjectType 288 Notes 289 OptHyperlink 290 --> 291 <br/> 292 <gsf:metadata-table> 293 <!-- <gsf:metadata name="Title">Title:</gsf:metadata> --> 294 <gsf:metadata name="Creator">Creator:</gsf:metadata> 295 <gsf:metadata name="Description">Description:</gsf:metadata> 296 <gsf:metadata name="OirginalOrCopy">Original or Copy:</gsf:metadata> 297 <gsf:metadata name="InCopyright">In Copyright:</gsf:metadata> 298 <gsf:metadata name="Theme">Theme(s):</gsf:metadata> 299 <gsf:metadata name="DLIdentifier">Document Id:</gsf:metadata> 300 <gsf:metadata name="Notes">Notes:</gsf:metadata> 301 </gsf:metadata-table> 302 <br/> 303 304 <gsf:switch> 305 <gsf:metadata name="OptHyperlink"/> 306 <gsf:when test="exists"> 307 308 309 <gsf:switch> 310 <gsf:metadata name="Plugin"/> 311 <gsf:when test="equals" test-value="ImagePlugin"> 312 <!-- supress external link, as screenview with link to original displayed --> 313 </gsf:when> 314 <gsf:otherwise> 315 <div> 316 <a target="_blank"> 317 <xsl:attribute name="href"><gsf:metadata name="OptHyperlink"/></xsl:attribute> 318 View original content 319 </a> 320 </div> 321 </gsf:otherwise> 322 </gsf:switch> 323 324 </gsf:when> 325 </gsf:switch> 326 327 </gsf:template> 328 329 <!-- 247 330 <gsf:template name="documentContent"> 248 331 <div id="gs-document"> 249 332 <xsl:call-template name="documentPre"/> 250 333 <xsl:call-template name="wrappedSectionImage"/> 251 <div id="gs-document-text"> 252 <xsl:call-template name="documentNodeText"/> 253 </div> 334 335 <gsf:switch> 336 <gsf:metadata name="FileFormat"/> 337 <gsf:when test="equals" test-value="HTML"> 338 </gsf:when> 339 <gsf:otherwise> 340 <div id="gs-document-text"> 341 <xsl:call-template name="documentNodeText"/> 342 </div> 343 </gsf:otherwise> 344 </gsf:switch> 254 345 </div> 255 346 </gsf:template> -
gs3-installations/thewillow/trunk/sites/thewillow/collect/community-contributions/prepare/README.txt
r37645 r37649 1 2 #---- 3 # To be able to convert docx to pdf 4 #---- 5 6 # headless/minimal version 7 8 sudo apt-get --no-install-recommends install libreoffice 9 10 11 #---- 12 # To process Excel spreadsheet and download Google drive 13 #---- 1 14 2 15 pip install openpyxl 3 16 pip install gdown 17 18 pip install requests 4 19 5 20 -
gs3-installations/thewillow/trunk/sites/thewillow/collect/community-contributions/prepare/xlsx-to-csv--thewillow-directorysheet.py
r37646 r37649 8 8 import gdown 9 9 import openpyxl 10 #import urllib.request 11 10 11 # import urllib.request 12 12 # from urllib.request import urlopen 13 13 … … 23 23 # The '/' on the end is important for gdown 24 24 downloads_outputdir = "downloads/" 25 26 #import ssl27 #ssl._create_default_https_context = ssl._create_unverified_context28 25 29 26 … … 169 166 170 167 ofile = url.rsplit('/', 1)[-1] # everything after the last '/' 168 if ofile == "": 169 # url ended in a diretory separator 170 # assume web server serves up default directory listing (e.g. index.html) page 171 ofile="index.html" 172 171 173 file_ext = os.path.splitext(ofile)[1] 172 174 173 175 # Looking for a simple file extension to the URL, otherwise assume HTML 174 176 … … 210 212 return download_filename 211 213 214 def optConvertDocToPdf(src_filename): 215 216 # Default assumption, no conversion 217 # 218 dst_filename = src_filename 219 220 src_dirname = os.path.dirname(src_filename) 221 src_filename_root, file_ext = os.path.splitext(src_filename) 222 223 lc_file_ext = file_ext.lower() 224 225 if lc_file_ext == ".docx" or lc_file_ext == ".doc": 226 227 dst_filename = src_filename_root + ".pdf" 228 229 cmd = "soffice --headless --convert-to pdf \"" + src_filename + "\" --outdir \"" + src_dirname + "\"" 230 status = os.system(cmd) 231 232 if status == 0: 233 os.remove(src_filename) 234 235 return dst_filename 236 237 212 238 def fileRenameToDLIdentifier(downloaded_filename,dl_identifier_str): 213 239 … … 260 286 orig_filename_colpos = HeadingsTransformation["OrigFilename"]["colpos"] 261 287 worksheet.cell(row=row_i, column=orig_filename_colpos, value=orig_file) 288 289 # If a doc/docx file, convert it to PDF with open-/ libre-office 290 # (assumed to be installed, otherwise conversion fails, 291 # and we continue with doc/docx file) 292 # 293 downloaded_filename = optConvertDocToPdf(downloaded_filename) 262 294 263 295 renamed_filename = fileRenameToDLIdentifier(downloaded_filename,dl_identifier_str)
Note:
See TracChangeset
for help on using the changeset viewer.