-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathstep2preprocessingRawFeature.pl
More file actions
44 lines (38 loc) · 1.15 KB
/
step2preprocessingRawFeature.pl
File metadata and controls
44 lines (38 loc) · 1.15 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
open IN,"drugFeature.txt";
my $headerLine = <IN>;
chomp $headerLine;
my @headers = split("\t",$headerLine);
open OUT,">drugFeatureP.csv";
print OUT $headers[0],",",$headers[1];
my @validIdx = (1);
for(my $i = 2; $i < @headers; $i++){
if(!($headers[$i] =~ /^[\sA-Za-z0-9\.\(\):;,\"\'%\>\<\*\#\+-\[\]\\\/]*$/)){
if($headers[$i] ne "。" && $headers[$i] ne "," && $headers[$i] ne "?" && $headers[$i] ne "!" &&
$headers[$i] ne ":" && $headers[$i] ne ";" && $headers[$i] ne "¥" && $headers[$i] ne "〉" &&
$headers[$i] ne "《" && $headers[$i] ne "〈" && $headers[$i] ne "》" && $headers[$i] ne "("
&& $headers[$i] ne ")" && $headers[$i] ne "、"){
push(@validIdx, $i);
}
}
}
open FET,">featureVariables.txt";
for(my $i = 1; $i < @validIdx; $i++){
print OUT ",v",$i;
print FET "v",$i,"\t",$headers[$validIdx[$i]],"\n";
}
print OUT "\n";
close FET;
open DRUG,">drugList.txt";
my $lineNum = 1;
while(<IN>){
chomp $_;
my @fields = split("\t", $_);
print OUT "d",$lineNum;
print DRUG "d",$lineNum,"\t",$fields[0],"\n";
foreach(@validIdx){ print OUT ",",$fields[$_] }
print OUT "\n";
$lineNum++;
}
close DRUG;
close IN;
close OUT;