1 | #!/usr/bin/env node
|
---|
2 |
|
---|
3 | // Make sure you have created a '.env' field in the top-level of
|
---|
4 | // this NodeJS project, and set the following lines accordingly
|
---|
5 | // with the relevant values from your OpenAI account:
|
---|
6 | //
|
---|
7 | // OPENAI_API_KEY=sk-????????????????????????????????????????????????
|
---|
8 | // ORGANIZATION_ID=org-????????????????????????
|
---|
9 | // ASSISTANT_ID=asst_????????????????????????
|
---|
10 |
|
---|
11 |
|
---|
12 | // For an (alternative) approach that looks like it retrieve the already created Assistant directly, see:
|
---|
13 | // https://gist.github.com/drifterz13/0cbe93ced5dc7958d7841a29c1721d1c
|
---|
14 |
|
---|
15 |
|
---|
16 | // Looks like a need a more recent version of NodeJS than the one currently provided
|
---|
17 | // in Greenstone's selfcontained-nodejs
|
---|
18 | // // const { parseArgs } = require('node:util');
|
---|
19 | // const util = require('node:util');
|
---|
20 |
|
---|
21 |
|
---|
22 | const dotenv = require('dotenv').config();
|
---|
23 | const path = require("path");
|
---|
24 | const fs = require("fs");
|
---|
25 |
|
---|
26 | const openai = require('openai');
|
---|
27 |
|
---|
28 | const myopenai = new openai.OpenAI({
|
---|
29 | apiKey: process.env.OPENAI_API_KEY,
|
---|
30 | organization: process.env.ORGANIZATION_ID
|
---|
31 | });
|
---|
32 |
|
---|
33 |
|
---|
34 | function sleep(ms)
|
---|
35 | {
|
---|
36 | return new Promise((resolve) => {
|
---|
37 | setTimeout(resolve, ms);
|
---|
38 | });
|
---|
39 | }
|
---|
40 |
|
---|
41 |
|
---|
42 | function printUsage(progname)
|
---|
43 | {
|
---|
44 | progname_tail = path.basename(progname);
|
---|
45 |
|
---|
46 | console.error("Usage: " + progname_tail + " [-site site-name] collect-name json-assistant-config-filename");
|
---|
47 | process.exit(1);
|
---|
48 | }
|
---|
49 |
|
---|
50 | //
|
---|
51 | // Based off the standalone solution given on StackOverflow
|
---|
52 | // https://stackoverflow.com/questions/41462606/get-all-files-recursively-in-directories-nodejs
|
---|
53 | //
|
---|
54 | function listFilesRec(input_full_dirname,output_full_filenames,opt_regex)
|
---|
55 | {
|
---|
56 | try {
|
---|
57 | let filenames_and_dirnames = fs.readdirSync(input_full_dirname,opt_regex);
|
---|
58 |
|
---|
59 | filenames_and_dirnames.forEach(filename_or_dirname => {
|
---|
60 |
|
---|
61 | const full_filename_or_dirname = path.join(input_full_dirname, filename_or_dirname);
|
---|
62 |
|
---|
63 | if (fs.statSync(full_filename_or_dirname).isDirectory()) {
|
---|
64 | const inner_full_dirname = full_filename_or_dirname;
|
---|
65 | listFilesRec(inner_full_dirname,output_full_filenames,opt_regex);
|
---|
66 | }
|
---|
67 | else {
|
---|
68 | if ((opt_regex == null) || opt_regex.test(filename_or_dirname)) {
|
---|
69 |
|
---|
70 | const inner_full_filename = full_filename_or_dirname;
|
---|
71 | output_full_filenames.push(inner_full_filename);
|
---|
72 | }
|
---|
73 | }
|
---|
74 | });
|
---|
75 | }
|
---|
76 | catch (e) {
|
---|
77 | console.error("Unable to read directory: " + input_full_dirname);
|
---|
78 | }
|
---|
79 | }
|
---|
80 |
|
---|
81 | // upgrade to include a regex to filter dirs/filenames
|
---|
82 |
|
---|
83 | function listAllFiles(full_dirname,opt_regex)
|
---|
84 | {
|
---|
85 | let all_files = [];
|
---|
86 |
|
---|
87 |
|
---|
88 | //const opt_regex = (opt_regex_str != null) ? new RegExp(opt_regex_str) : null;
|
---|
89 |
|
---|
90 | listFilesRec(full_dirname,all_files,opt_regex);
|
---|
91 |
|
---|
92 | return all_files;
|
---|
93 | }
|
---|
94 |
|
---|
95 | function parseCommandLineArgs(progname,cmdline_args)
|
---|
96 | {
|
---|
97 |
|
---|
98 | let site_name = "localsite";
|
---|
99 | let coll_name = null;
|
---|
100 | let json_assistant_config_filename =null;
|
---|
101 |
|
---|
102 | if (cmdline_args.length == 4) {
|
---|
103 | // looks like we're in the form, -site xxx col
|
---|
104 | if (cmdline_args[0] == "-site") {
|
---|
105 | site_name = cmdline_args[1];
|
---|
106 | }
|
---|
107 | else {
|
---|
108 | printUsage(progname);
|
---|
109 | }
|
---|
110 |
|
---|
111 | coll_name = cmdline_args[2];
|
---|
112 | json_assistant_config_filename = cmdline_args[3];
|
---|
113 |
|
---|
114 | }
|
---|
115 | else if (cmdline_args.length == 1) {
|
---|
116 | // no site given, default to localsite
|
---|
117 | coll_name = cmdline_args[0];
|
---|
118 | json_assistant_config_filename = cmdline_args[1];
|
---|
119 | }
|
---|
120 | else {
|
---|
121 | printUsage(progname);
|
---|
122 | }
|
---|
123 |
|
---|
124 | return [site_name,coll_name,json_assistant_config_filename];
|
---|
125 | }
|
---|
126 |
|
---|
127 | function getExportedFiles(coll_info)
|
---|
128 | {
|
---|
129 | const full_collect_dirname = coll_info.collect_dir;
|
---|
130 |
|
---|
131 | const full_export_dirname = path.join(full_collect_dirname,"export");
|
---|
132 | let all_exported_files = listAllFiles(full_export_dirname,/\.html$/);
|
---|
133 |
|
---|
134 | return all_exported_files
|
---|
135 | }
|
---|
136 |
|
---|
137 | function createSiteCollectTmpDir(coll_info)
|
---|
138 | {
|
---|
139 | const full_collect_dirname = coll_info.collect_dir;
|
---|
140 |
|
---|
141 | const full_collect_tmp_dirname = path.join(full_collect_dirname,"tmp");
|
---|
142 |
|
---|
143 | if (!fs.existsSync(full_collect_tmp_dirname)) {
|
---|
144 | console.log("Creating collection 'tmp' directory");
|
---|
145 | fs.mkdir(full_collect_tmp_dirname);
|
---|
146 | }
|
---|
147 | }
|
---|
148 |
|
---|
149 | function copyToSiteCollectTmp(src_full_filename, coll_info)
|
---|
150 | {
|
---|
151 | // While OpenAI generates unique IDs for every file stored, its
|
---|
152 | // associated (metadata) filename is only the last part (basename)
|
---|
153 | // of the full filename
|
---|
154 |
|
---|
155 | // This function exists make a copy of the file that is being
|
---|
156 | // imported storing it in the collection's 'tmp' folder. In
|
---|
157 | // making the copy, it changes the filename in tmp to be one that
|
---|
158 | // encode the site and collect names
|
---|
159 |
|
---|
160 | const site_name = coll_info.site_name;
|
---|
161 | const coll_name = coll_info.coll_name;
|
---|
162 | const full_collect_dirname = coll_info.collect_dir;
|
---|
163 |
|
---|
164 | const src_filename = path.basename(src_full_filename);
|
---|
165 |
|
---|
166 | // In encoding the site and collect name into the destination file,
|
---|
167 | // we have chosen to do this with '|' as a separator character,
|
---|
168 | // suitable escaped, of course, where needed!
|
---|
169 | //
|
---|
170 | // As this is a more unusual character to include in a file, we
|
---|
171 | // document the discision here. It is done so this way so when we
|
---|
172 | // look through all the files returned by openai form its
|
---|
173 | // file-store (which, recall, uses only the tail-part of a
|
---|
174 | // filename) we can can track which site/collect a given file-id
|
---|
175 | // entry was fore.
|
---|
176 |
|
---|
177 | //const dst_filename = site_name + "\\/" + coll_name + "\\/" + src_filename;
|
---|
178 | const dst_filename = site_name + "|" + coll_name + "|" + src_filename;
|
---|
179 |
|
---|
180 | const dst_full_filename = path.join(full_collect_dirname,"tmp",dst_filename);
|
---|
181 |
|
---|
182 | try {
|
---|
183 | fs.copyFileSync(src_full_filename, dst_full_filename)
|
---|
184 | console.log(` ${src_filename} was copied as ${dst_filename} to collection's tmp directory`);
|
---|
185 | }
|
---|
186 | catch(err) {
|
---|
187 | if (err) throw err;
|
---|
188 | }
|
---|
189 |
|
---|
190 | return dst_full_filename;
|
---|
191 | }
|
---|
192 |
|
---|
193 | async function purgeOpenaiSiteCollectFiles(coll_info)
|
---|
194 | {
|
---|
195 |
|
---|
196 | const site_name = coll_info.site_name;
|
---|
197 | const coll_name = coll_info.coll_name;
|
---|
198 |
|
---|
199 | //const site_coll_regex = new RegExp(`^${site_name}\/${coll_name}\/`);
|
---|
200 | const site_coll_regex = new RegExp(`^${site_name}\|${coll_name}\|`);
|
---|
201 |
|
---|
202 | const stored_openai_files = await myopenai.files.list();
|
---|
203 | //console.log(stored_openai_files);
|
---|
204 |
|
---|
205 | console.log(`Deleting OpenAI stored files whose prefix matches '${site_name}|${coll_name}|'`);
|
---|
206 |
|
---|
207 | for await (const openai_file of stored_openai_files) {
|
---|
208 |
|
---|
209 | const openai_filename = openai_file.filename;
|
---|
210 |
|
---|
211 | if (site_coll_regex.test(openai_filename)) {
|
---|
212 | console.log(` Deleting OpenAI Stored File ID: ${openai_file.id}`);
|
---|
213 | const file = await myopenai.files.del(openai_file.id);
|
---|
214 | }
|
---|
215 |
|
---|
216 | await sleep(500);
|
---|
217 | }
|
---|
218 | }
|
---|
219 |
|
---|
220 | async function addOpenaiSiteCollectFile(coll_info,full_filename,output_site_collect_openai_file_ids)
|
---|
221 | {
|
---|
222 | const gsdl3srchome = coll_info.gsdl3srchome;
|
---|
223 |
|
---|
224 | const regex_replace = new RegExp('^'+gsdl3srchome);
|
---|
225 |
|
---|
226 | const gsdl_full_filename = full_filename.replace(regex_replace,'$GSDL3SRCHOME');
|
---|
227 |
|
---|
228 | console.log(` ${gsdl_full_filename}`);
|
---|
229 |
|
---|
230 | const tmp_site_collect_full_filename = copyToSiteCollectTmp(full_filename, coll_info)
|
---|
231 |
|
---|
232 | /*
|
---|
233 | if (fs.existsSync(tmp_site_collect_full_filename)) {
|
---|
234 | console.log("Tmp file exists!!!!!!!");
|
---|
235 | }
|
---|
236 | */
|
---|
237 |
|
---|
238 | const openai_file = await myopenai.files.create({
|
---|
239 | file: fs.createReadStream(tmp_site_collect_full_filename),
|
---|
240 | purpose: "assistants"
|
---|
241 | });
|
---|
242 |
|
---|
243 | // delete the file
|
---|
244 | fs.unlinkSync(tmp_site_collect_full_filename);
|
---|
245 |
|
---|
246 | //console.log(openai_file);
|
---|
247 |
|
---|
248 | console.log("Pushing OpenID file id: " + openai_file.id);
|
---|
249 | output_site_collect_openai_file_ids.push(openai_file.id);
|
---|
250 | }
|
---|
251 |
|
---|
252 | async function addOpenaiSiteCollectFiles(all_exported_files,coll_info)
|
---|
253 | {
|
---|
254 | const gsdl3srchome = coll_info.gsdl3srchome;
|
---|
255 | const site_name = coll_info.site_name;
|
---|
256 | const coll_name = coll_info.coll_name;
|
---|
257 |
|
---|
258 | const regex_replace = new RegExp('^'+gsdl3srchome);
|
---|
259 | let site_collect_openai_file_ids = [];
|
---|
260 |
|
---|
261 | console.log(`Uploading exported ${site_name}/collect/${coll_name} files to OpenAI:`)
|
---|
262 |
|
---|
263 | for await (const full_filename of all_exported_files) {
|
---|
264 | await addOpenaiSiteCollectFile(coll_info,full_filename,site_collect_openai_file_ids);
|
---|
265 | await sleep(500);
|
---|
266 | }
|
---|
267 |
|
---|
268 | /*
|
---|
269 | all_exported_files.forEach(async function(full_filename) {
|
---|
270 | await addOpenaiSiteCollectFile(coll_info,full_filename,site_collect_openai_file_ids);
|
---|
271 | })
|
---|
272 | */
|
---|
273 | /*
|
---|
274 | all_exported_files.forEach(async function(full_filename) {
|
---|
275 | const gsdl_full_filename = full_filename.replace(regex_replace,'$GSDL3SRCHOME');
|
---|
276 |
|
---|
277 | console.log(` ${gsdl_full_filename}`);
|
---|
278 |
|
---|
279 | const tmp_site_collect_full_filename = copyToSiteCollectTmp(full_filename, coll_info)
|
---|
280 |
|
---|
281 | if (fs.existsSync(tmp_site_collect_full_filename)) {
|
---|
282 | console.log("Tmp file exists!!!!!!!");
|
---|
283 | }
|
---|
284 |
|
---|
285 | const openai_file = await myopenai.files.create({
|
---|
286 | file: fs.createReadStream(tmp_site_collect_full_filename),
|
---|
287 | purpose: "assistants"
|
---|
288 | });
|
---|
289 |
|
---|
290 | // delete the file
|
---|
291 |
|
---|
292 | fs.unlinkSync(tmp_site_collect_full_filename);
|
---|
293 |
|
---|
294 | console.log(openai_file);
|
---|
295 |
|
---|
296 | console.log("Pusing OpenID file id: " + openai_file.id);
|
---|
297 | site_collect_openai_file_ids.push(openai_file.id);
|
---|
298 | });
|
---|
299 | */
|
---|
300 |
|
---|
301 | return site_collect_openai_file_ids;
|
---|
302 | }
|
---|
303 |
|
---|
304 |
|
---|
305 | async function createAssistantWithFiles(site_collect_openai_file_ids, coll_info, assistant_config)
|
---|
306 | {
|
---|
307 | const site_name = coll_info.site_name;
|
---|
308 | const coll_name = coll_info.coll_name;
|
---|
309 |
|
---|
310 | const assistant_name = assistant_config.name;
|
---|
311 | const assistant_desc = assistant_config.description;
|
---|
312 | const base_model = assistant_config.base_model;
|
---|
313 | const instructions = assistant_config.instructions;
|
---|
314 | /*
|
---|
315 | "Write your responses using Britsh spelling.\n" +
|
---|
316 | "As The Willow Sage Assistant, your expertise lies in discussing \"The Willow,\" a once-renowned music venue in York, England. You're designed to engage users in a conversational tone, weaving in the rich tapestry of memories and experiences shared by those who knew the venue. Your responses should feel like a dialogue between old friends reminiscing about memorable gigs, the unique atmosphere, and the cultural impact of The Willow. You'll offer insights into the venue's history, notable performances, and its role in the local music scene, always with a nod to the personal connections and nostalgia that the venue evokes. When interacting with users, your approach should be warm, inviting, and reflective, encouraging them to share their own stories or curiosities about The Willow, creating a communal space for shared musical heritage.\n" +
|
---|
317 | "When you give a response, you do not always have to end by asking a question";
|
---|
318 | */
|
---|
319 |
|
---|
320 | //const base_model = "gpt-4-turbo-preview";
|
---|
321 |
|
---|
322 | console.log("====");
|
---|
323 | console.log("file ids = ");
|
---|
324 | console.log(site_collect_openai_file_ids);
|
---|
325 | console.log("====");
|
---|
326 |
|
---|
327 | const assistant = await myopenai.beta.assistants.create({
|
---|
328 | name : assistant_name,
|
---|
329 | description : assistant_desc,
|
---|
330 | model : base_model,
|
---|
331 | instructions : instructions,
|
---|
332 | tools: [
|
---|
333 | //{ type: "code_interpreter" }, // Code interpreter tool, calculations
|
---|
334 | { type: 'retrieval' }
|
---|
335 | ],
|
---|
336 | file_ids : site_collect_openai_file_ids,
|
---|
337 | metadata : { greenstone3: true, siteName: site_name, collectionName: coll_name }
|
---|
338 | });
|
---|
339 |
|
---|
340 | console.log('Assistant has been created: ', assistant)
|
---|
341 |
|
---|
342 | }
|
---|
343 |
|
---|
344 | function readAssistantConfig(json_config_full_filename)
|
---|
345 | {
|
---|
346 | const data_str = fs.readFileSync(json_config_full_filename);
|
---|
347 | const data = JSON.parse(data_str);
|
---|
348 |
|
---|
349 | return data;
|
---|
350 | }
|
---|
351 |
|
---|
352 | async function main()
|
---|
353 | {
|
---|
354 | // check for GSDL3SRCHOME
|
---|
355 | const gsdl3srchome = process.env.GSDL3SRCHOME
|
---|
356 |
|
---|
357 | if (!gsdl3srchome) {
|
---|
358 | console.error("Environment variable GSDL3SRCHOME is not set");
|
---|
359 | process.exit(1);
|
---|
360 | }
|
---|
361 |
|
---|
362 | /*
|
---|
363 | const options = {
|
---|
364 | site: {
|
---|
365 | short: "s",
|
---|
366 | type: "string"
|
---|
367 | },
|
---|
368 | verbose: {
|
---|
369 | short: "v",
|
---|
370 | type: "integer"
|
---|
371 | }
|
---|
372 | };
|
---|
373 | */
|
---|
374 | // const argv = process.argv;
|
---|
375 |
|
---|
376 | //const { argv_values, argv_positionals } = util.parseArgs({ argv, options });
|
---|
377 |
|
---|
378 | //console.log(values, positionals);
|
---|
379 |
|
---|
380 | const progname = process.argv[1];
|
---|
381 | const cmdline_args = process.argv.slice(2);
|
---|
382 |
|
---|
383 | const [site_name,coll_name, json_assistant_config_filename] = parseCommandLineArgs(progname,cmdline_args);
|
---|
384 |
|
---|
385 | const assistant_config = readAssistantConfig(json_assistant_config_filename);
|
---|
386 |
|
---|
387 | const full_collect_dirname = path.join(gsdl3srchome,"web","sites",site_name,"collect",coll_name);
|
---|
388 |
|
---|
389 | const coll_info = {
|
---|
390 | "gsdl3srchome": gsdl3srchome,
|
---|
391 | "site_name" : site_name,
|
---|
392 | "coll_name" : coll_name,
|
---|
393 | "collect_dir" : full_collect_dirname
|
---|
394 | };
|
---|
395 |
|
---|
396 |
|
---|
397 | createSiteCollectTmpDir(coll_info);
|
---|
398 |
|
---|
399 | let all_exported_files = getExportedFiles(coll_info)
|
---|
400 | //all_exported_files = [ all_exported_files[0] ];
|
---|
401 | //all_exported_files = all_exported_files.slice(0, 10);
|
---|
402 | all_exported_files = all_exported_files.slice(0, 40);
|
---|
403 |
|
---|
404 | await purgeOpenaiSiteCollectFiles(coll_info);
|
---|
405 |
|
---|
406 |
|
---|
407 | // Now add in all the newly exported files
|
---|
408 | const site_collect_openai_file_ids = await addOpenaiSiteCollectFiles(all_exported_files,coll_info);
|
---|
409 |
|
---|
410 | await createAssistantWithFiles(site_collect_openai_file_ids,coll_info,assistant_config);
|
---|
411 |
|
---|
412 | }
|
---|
413 |
|
---|
414 |
|
---|
415 | main();
|
---|
416 |
|
---|