source: trunk/protemix/rename.pl@ 3162

Last change on this file since 3162 was 3162, checked in by sjboddie, 22 years ago

Initial revision

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 4.8 KB
Line 
1#!/usr/bin/perl -w
2
3
4BEGIN {
5 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
6 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
7}
8
9
10use File::Basename;
11use unicode;
12
13my %classifications = (
14 'Pharmacokinetics' => ['Absorption', 'Distribution', 'Metabolism', 'Elimination',
15 'Effect of Food', 'Drug Interactions', 'Special Populations'],
16 'Pharmacodynamics' => ['Trace Metal', 'Excretion', 'Selectivity', 'Copper', 'Zinc',
17 'Manganese', 'Iron', 'Distribution', 'Tissue', 'Brain',
18 'Heart', 'Plasma', 'Fetal', 'Liver', 'Kidney'],
19 'Safety/Side Effects/Toxicity' => ['Acute Toxicity', 'Repeated Dose Toxicity',
20 'Studies in Rodents', 'Studies in Dogs',
21 'Overdosage', 'Reproductive Toxicity',
22 'Teratogenicity', 'Mutagenecity', 'Carcinogenicity'],
23 'Other' => ['Superoxide Dismutase', 'Ferroxidase I', 'Ferroxidase II',
24 'Metallothienein', 'Ceruloplasmin'],
25 'Efficacy' => ['Heart', 'Complications', 'Microvascular', 'Renal', 'Diabetes'],
26 'Contraindications' => [],
27 'Precautions' => []
28 );
29
30my @meta_files = ();
31
32&recursive_rename("import");
33# process meta files
34foreach my $mfile (@meta_files) {
35 &process_meta_file($mfile);
36}
37
38sub recursive_rename {
39 my ($dir) = @_;
40
41 opendir (DIR, "$dir") || die;
42 my @files = readdir DIR;
43 closedir DIR;
44
45 foreach $file (@files) {
46 next if $file eq "." || $file eq "..";
47 next if $file =~ /processed.htm$/;
48
49 my $path = "$dir/$file";
50 if (-d $path) {
51 &recursive_rename($path);
52 } else {
53 $newfile = &rename_file($file);
54 if ($newfile ne $file) {
55 print STDERR "renaming $file --> $newfile\n";
56 `mv "$path" "$dir/$newfile"`;
57 }
58
59 if ($file =~ /^title\.txt$/) {
60 # we'll process all the title.txt files after we've finished
61 # renaming everything
62 push(@meta_files, "$dir/$newfile");
63 }
64
65 if ($file =~ /\.html?$/i) {
66 &process_html_file("$dir/$newfile");
67 }
68 }
69 }
70}
71
72# clean up the html (currently just use the non-css version)
73sub process_html_file {
74 my ($filename) = @_;
75
76 print STDERR "processing $filename\n";
77
78 open (FILE, $filename) || die;
79 undef $/;
80 my $file = <FILE>;
81 $/ = "\n";
82 close FILE;
83
84 my $header = "<html>\n<head></head>\n<body bgcolor=\"#FFFFFF\">\n";
85 my $footer = "</body>\n</html>\n";
86
87 my ($noncss, $css) = $file =~ /document\.write\(\"(.*?[^\\])\"\).*?document\.write\(\"(.*?[^\\])\"\)/si;
88
89 # remove backslashes added for javascript strings
90 $noncss =~ s/\\\"/\"/sg;
91 $noncss =~ s/\\\n/\n/sg;
92
93 # alter <img> tags to support renamed files
94 $noncss =~ s/(<img src=\")([^\"]+)/$1 . &rename_file($2)/sige;
95
96 # remove rules="all" attribute from table tags
97 $noncss =~ s/(<table.*?)rules=\"all\"\s+/$1/sig;
98
99 open (FILE, ">$filename") || die;
100 print FILE $header . $noncss . $footer;
101 close FILE;
102
103}
104
105sub rename_file {
106 my ($filename) = @_;
107
108 $filename =~ s/\s+//g;
109 $filename =~ s/^protemix\(\d+\)-?//;
110
111 return $filename;
112}
113
114# process a title.txt file and replace it with a meta.xml file
115sub process_meta_file {
116 my ($filename) = @_;
117
118 open (FILE, $filename) || die ("couldn't open $filename");
119 undef $/;
120 my $title = <FILE>;
121 $/ = "\n";
122 close FILE;
123
124 unlink($filename);
125
126 $title =~ s/\s+/ /gs;
127 $title =~ s/^\s+//;
128 $title =~ s/\s+$//;
129 $title = &unicode::ascii2utf8(\$title); # assumes title is iso-8859-1
130 my $metafile = "<Metafile>\n";
131 $metafile .= " <Metadata name=\"Title\">$title</Metadata>\n";
132
133 # currently just write some random values for classification metadata
134 my @c1 = ('Animal', 'Human', 'Other');
135 my $rand = int(rand 3);
136 my $classtext = " <Metadata name=\"Class1\">" . $c1[$rand] . "</Metadata>\n";
137 my @c2 = ('Pharmacokinetics', 'Pharmacodynamics', 'Safety/Side Effects/Toxicity',
138 'Other', 'Efficacy', 'Contraindications', 'Precautions');
139 $rand = int(rand 7);
140 my $c2val = $c2[$rand];
141 $classtext .= " <Metadata name=\"Class2\">$c2val</Metadata>\n";
142 if (defined $classifications{$c2val}) {
143 if (scalar(@{$classifications{$c2val}})) {
144 my $numvals = scalar(@{$classifications{$c2val}});
145 $rand = int(rand $numvals);
146 my $c3val = $classifications{$c2val}->[$rand];
147 $classtext .= " <Metadata name=\"Class3\">$c3val</Metadata>\n";
148 }
149 } else {
150 print STDERR "ERROR: '$c2val' not in clasifications list\n";
151 }
152
153 my $dir = File::Basename::dirname($filename);
154
155 opendir(DIR, $dir) || die;
156 my @files = readdir DIR;
157 foreach my $file (@files) {
158 if ($file =~ s/\.html?$//i) {
159 $metafile .= " <Page filename=\"$file\">\n$classtext </Page>\n";
160 }
161 }
162 $metafile .= "</Metafile>\n";
163
164 $filename = "$dir/meta.xml";
165 open (FILE, ">$filename") || die;
166 print FILE $metafile;
167 close FILE;
168}
169
170
Note: See TracBrowser for help on using the repository browser.