Changeset 38783
- Timestamp:
- 2024-02-25T00:15:29+13:00 (2 months ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
gs3-installations/thewillow/trunk/sites/thewillow/dlcol-chatgpt/create-assistant/main.js
r38781 r38783 37 37 38 38 39 function listFilesRec(input_full_dirname,output_full_filenames )39 function listFilesRec(input_full_dirname,output_full_filenames,opt_regex) 40 40 { 41 41 try { 42 let filenames_and_dirnames = fs.readdirSync(input_full_dirname );42 let filenames_and_dirnames = fs.readdirSync(input_full_dirname,opt_regex); 43 43 44 44 filenames_and_dirnames.forEach(filename_or_dirname => { 45 45 46 46 const full_filename_or_dirname = path.join(input_full_dirname, filename_or_dirname); 47 47 48 48 if (fs.statSync(full_filename_or_dirname).isDirectory()) { 49 49 const inner_full_dirname = full_filename_or_dirname; 50 listFilesRec(inner_full_dirname,output_full_filenames );50 listFilesRec(inner_full_dirname,output_full_filenames,opt_regex); 51 51 } 52 52 else { 53 const inner_full_filename = full_filename_or_dirname; 54 output_full_filenames.push(inner_full_filename); 53 if ((opt_regex == null) || opt_regex.test(filename_or_dirname)) { 54 55 const inner_full_filename = full_filename_or_dirname; 56 output_full_filenames.push(inner_full_filename); 57 } 55 58 } 56 59 }); … … 63 66 // upgrade to include a regex to filter dirs/filenames 64 67 65 function listAllFiles(full_dirname )68 function listAllFiles(full_dirname,opt_regex) 66 69 { 67 70 let all_files = []; 68 71 69 listFilesRec(full_dirname,all_files); 70 72 73 //const opt_regex = (opt_regex_str != null) ? new RegExp(opt_regex_str) : null; 74 75 listFilesRec(full_dirname,all_files,opt_regex); 76 71 77 return all_files; 72 78 } 73 79 74 75 76 77 // check for GSDL3SRCHOME 78 const gsdl3srchome = process.env.GSDL3SRCHOME 79 80 if (!gsdl3srchome) { 81 console.error("Environment variable GSDL3SRCHOME is not set"); 82 process.exit(1); 83 } 84 85 const progname = process.argv[1]; 86 const cmdline_args = process.argv.slice(2); 87 88 89 let site_name = "localsite"; 90 let coll_name = null; 91 92 if (cmdline_args.length == 3) { 93 // looks like we're in the form, -site xxx col 94 if (cmdline_args[0] == "-site") { 95 site_name = cmdline_args[1]; 80 function parseCommandLineArgs(progname,cmdline_args) 81 { 82 83 let site_name = "localsite"; 84 let coll_name = null; 85 86 if (cmdline_args.length == 3) { 87 // looks like we're in the form, -site xxx col 88 if (cmdline_args[0] == "-site") { 89 site_name = cmdline_args[1]; 90 } 91 else { 92 printUsage(progname); 93 } 94 95 coll_name = cmdline_args[2]; 96 } 97 else if (cmdline_args.length == 1) { 98 // no site given, default to localsite 99 coll_name = cmdline_args[0]; 96 100 } 97 101 else { … … 99 103 } 100 104 101 coll_name = cmdline_args[2]; 102 } 103 else if (cmdline_args.length == 1) { 104 // no site given, default to localsite 105 coll_name = cmdline_args[0]; 106 } 107 else { 108 printUsage(progname); 109 } 110 111 const full_export_dirname = path.join(gsdl3srchome,"web","sites",site_name,"collect",coll_name,"export"); 112 113 114 let all_exported_files = listAllFiles(full_export_dirname); 115 console.log(all_exported_files); 116 117 118 119 120 /* 121 const file = await openai.files.create({ 122 file: fs.createReadStream("mydata.csv"), 123 purpose: "assistants", 124 }); 125 */ 105 return [site_name,coll_name]; 106 } 107 108 function getExportedFiles(coll_info) 109 { 110 const full_collect_dirname = coll_info.collect_dir; 111 112 const full_export_dirname = path.join(full_collect_dirname,"export"); 113 let all_exported_files = listAllFiles(full_export_dirname,/\.html$/); 114 115 return all_exported_files 116 } 117 118 function createSiteCollectTmpDir(coll_info) 119 { 120 const full_collect_dirname = coll_info.collect_dir; 121 122 const full_collect_tmp_dirname = path.join(full_collect_dirname,"tmp"); 123 124 if (!fs.existsSync(full_collect_tmp_dirname)) { 125 console.log("Creating collection 'tmp' directory"); 126 fs.mkdir(full_collect_tmp_dirname); 127 } 128 } 129 130 function copyToSiteCollectTmp(src_full_filename, coll_info) 131 { 132 // While OpenAI generates unique IDs for every file stored, its 133 // associated (metadata) filename is only the last part (basename) 134 // of the full filename 135 136 // This function exists make a copy of the file that is being 137 // imported storing it in the collection's 'tmp' folder. In 138 // making the copy, it changes the filename in tmp to be one that 139 // encode the site and collect names 140 141 const site_name = coll_info.site_name; 142 const coll_name = coll_info.coll_name; 143 const full_collect_dirname = coll_info.collect_dir; 144 145 const src_filename = path.basename(src_full_filename); 146 147 // In encoding the site and collect name into the destination file, 148 // we have chosen to do this with '|' as a separator character, 149 // suitable escaped, of course, where needed! 150 // 151 // As this is a more unusual character to include in a file, we 152 // document the discision here. It is done so this way so when we 153 // look through all the files returned by openai form its 154 // file-store (which, recall, uses only the tail-part of a 155 // filename) we can can track which site/collect a given file-id 156 // entry was fore. 157 158 //const dst_filename = site_name + "\\/" + coll_name + "\\/" + src_filename; 159 const dst_filename = site_name + "|" + coll_name + "|" + src_filename; 160 161 const dst_full_filename = path.join(full_collect_dirname,"tmp",dst_filename); 162 163 fs.copyFile(src_full_filename, dst_full_filename, (err) => { 164 if (err) throw err; 165 console.log(`${src_filename} was copied as ${dst_filename} to collection's tmp directory`); 166 }); 167 168 return dst_full_filename; 169 } 170 171 async function purgeOpenaiSiteCollectFiles(coll_info) 172 { 173 174 const site_name = coll_info.site_name; 175 const coll_name = coll_info.coll_name; 176 177 //const site_coll_regex = new RegExp(`^${site_name}\/${coll_name}\/`); 178 const site_coll_regex = new RegExp(`^${site_name}\|${coll_name}\|`); 179 180 const stored_openai_files = await myopenai.files.list(); 181 console.log(stored_openai_files); 182 183 for await (const openai_file of stored_openai_files) { 184 185 const openai_filename = openai_file.filename; 186 187 if (site_coll_regex.test(openai_filename)) { 188 console.log(` Deleting OpenAI Stored File ID: ${openai_file.id}`); 189 const file = await myopenai.files.del(openai_file.id); 190 } 191 } 192 } 193 194 195 196 async function main() 197 { 198 // check for GSDL3SRCHOME 199 const gsdl3srchome = process.env.GSDL3SRCHOME 200 201 if (!gsdl3srchome) { 202 console.error("Environment variable GSDL3SRCHOME is not set"); 203 process.exit(1); 204 } 205 206 const progname = process.argv[1]; 207 const cmdline_args = process.argv.slice(2); 208 209 const [site_name,coll_name] = parseCommandLineArgs(progname,cmdline_args); 210 211 const full_collect_dirname = path.join(gsdl3srchome,"web","sites",site_name,"collect",coll_name); 212 213 const coll_info = { 214 "gsdl3srchome": gsdl3srchome, 215 "site_name" : site_name, 216 "coll_name" : coll_name, 217 "collect_dir" : full_collect_dirname 218 }; 219 220 createSiteCollectTmpDir(coll_info); 221 222 let all_exported_files = getExportedFiles(coll_info) 223 //all_exported_files = [ all_exported_files[0] ]; 224 225 await purgeOpenaiSiteCollectFiles(coll_info); 226 227 const regex_replace = new RegExp('^'+gsdl3srchome); 228 let openai_files = []; 229 230 console.log(`Uploading exported ${site_name}/collect/${coll_name} files to OpenAI:`) 231 all_exported_files.forEach(async function(full_filename) { 232 const gsdl_full_filename = full_filename.replace(regex_replace,'$GSDL3SRCHOME'); 233 234 console.log(` ${gsdl_full_filename}`); 235 236 const tmp_site_collect_full_filename = copyToSiteCollectTmp(full_filename, coll_info) 237 238 239 const openai_file = await myopenai.files.create({ 240 file: fs.createReadStream(tmp_site_collect_full_filename), 241 purpose: "assistants" 242 }); 243 244 245 openai_files.push(openai_file); 246 }); 247 } 248 249 250 main(); 251
Note:
See TracChangeset
for help on using the changeset viewer.