At work we were comparing two measurement techniques that involved two different instrument setups, one taking digital voltage threshold values and the other taking an analog resistance measurement to detect a gross shorting in a circuit. The software interfaces to these instruments were written in LabView a long long time ago, and did not output the data in a similar format. Both measurements were done on the same set of devices, and I wanted to match up the results between the two. My original perl script for this was just a brute force linear search between the two data files, looking for matching pairs and outputting the results in a new table file. Turns out that the ~23,000 data points was a bit too much for such lazy programming, and soon realized that this simple task was taking all day. After a friend showed me how quickly he could match the data sets using C# and LINQ I realized that this script could do something similar if I used perl's "sort" function. So here is the script for future reference.
use warnings;
use strict;
open(DIGITAL, "<spider_mask_defect_database.txt");
open(ANALOG, "<spider_mask_resistance_database.txt");
open(OUTPUT, ">Spider_Mask_Measurement_Verification.txt");
open(OUTPUTA, ">Analog_Data.txt");
open(OUTPUTD, ">Digital_Data.txt");
print "Spider_Mask_Measurement_Validation_20101129: Script started...\n";
my %LotNames = ("FDC25-001_Res" => "FDC25-001_Retest2",
"FDC25-002_Res" => "FDC25-002_Retest",
"FDC25-002_Shorts_Res" => "FDC25-002_Retest",
"FDC25-003_Cont_Res" => "FDC25-003",
"FDC25-003_Shorts_Res" => "FDC25-003",
"FDC25-004_Res" => "FDC25-004",
"FDC25-005_Res" => "FDC25-005",
"FDC25-006_Res" => "FDC25-006",
"Spider_Mask_Res" => "Spider_Mask_PEN_Lot",
"FDC25-007_Res" => "FDC25-007");
my %PitchNames = ("3um" => 3,
"6um" => 6,
"9um" => 9,
"12um"=> 12);
my $DigitalHeader = <DIGITAL>;
my $AnalogHeader = <ANALOG>;
my $i=0;
my $j=0;
my @OutputMatrix;
$OutputMatrix[0][0] = "Lot ID";
$OutputMatrix[0][1] = "Wafer ID";
$OutputMatrix[0][2] = "Device Pitch";
$OutputMatrix[0][3] = "Row";
$OutputMatrix[0][4] = "Column";
$OutputMatrix[0][5] = "Device Site";
$OutputMatrix[0][6] = "Analog Continuity";
$OutputMatrix[0][7] = "Analog Shorts";
$OutputMatrix[0][8] = "Digital Continuity";
$OutputMatrix[0][9] = "Digital Shorts\n";
#
# Note that:
# "Lot ID" = "Lot ID"
# "Wafer ID" = "Wafer ID"
# "Device ID" = "Pitch"
# "Row" = "Position X"
# "Column" = "Position Y"
# "Site ID" = "Site"
#
#
# In resistance database column labels are:
# | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
# |Lot ID|Wafer ID|Device ID|Row|Column|Site ID|Resistance|Measurement Type|
#
# In defect database column labels are:
# | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
# |Lot ID|Wafer|Site|Position X|Position Y|Pitch|Continuity|Shorts|
#
my @DIGITALDATA = ();
@DIGITALDATA = <DIGITAL>;
close(DIGITAL);
my @ANALOGDATA = ();
my @dataline = ();
# Use %LotNames to rename all analog lot labels to their digital counterparts.
while (<ANALOG>) {
@dataline = split(/\t/, $_);
$dataline[0] = $LotNames{$dataline[0]} or die "$dataline[0] not listed in LotNames: $!\n";
push(@ANALOGDATA, join("\t", @dataline));
}
if ($#ANALOGDATA <= 0) {
print "ANALOGDATA is empty\n";
exit(-1);
}
if ($#DIGITALDATA <= 0) {
print "DIGITALDATA is empty\n";
exit(-1);
}
close(ANALOG);
print "Done loading database into memory. Starting data sorting and merging...\n";
my @Sorted_Digital_Data = ();
my @Sorted_Analog_Data = ();
my @Temp_Analog_Data = ();
@Temp_Analog_Data = map { (split /\t/, $_)[2] =~ /([0-9]+)um/; [$1, $_] } @ANALOGDATA;
@Sorted_Digital_Data = sort {(split /\t/, $a)[0] cmp (split /\t/,$b)[0] || (split /\t/,$a)[1] cmp (split /\t/,$b)[1] || (split /\t/,$a)[2] cmp (split /\t/,$b)[2] || (split /\t/,$a)[5] <=> (split /\t/,$b)[5] || (split /\t/,$a)[3] <=> (split /\t/,$b)[3] || (split /\t/,$a)[4] <=> (split /\t/,$b)[4]} @DIGITALDATA;
@Temp_Analog_Data = sort {(split /\t/,$a->[1])[0] cmp (split /\t/,$b->[1])[0] || (split /\t/,$a->[1])[1] cmp (split /\t/,$b->[1])[1] || (split /\t/,$a->[1])[5] cmp (split /\t/,$b->[1])[5] || $a->[0] <=> $b->[0] || (split /\t/,$a->[1])[3] <=> (split /\t/,$b->[1])[3] || (split /\t/,$a->[1])[4] <=> (split /\t/,$b->[1])[4] || (split /\t/,$a->[1])[7] cmp (split /\t/,$b->[1])[7]} @Temp_Analog_Data;
@Sorted_Analog_Data = map { $_->[1] } @Temp_Analog_Data;
# Print out sorted data into files for analysis
print OUTPUTD $DigitalHeader;
for ($i=0; $i<=$#Sorted_Digital_Data; $i++) {
print OUTPUTD $Sorted_Digital_Data[$i];
}
print OUTPUTA $AnalogHeader;
for ($i=0; $i<=$#Sorted_Analog_Data; $i++) {
print OUTPUTA $Sorted_Analog_Data[$i];
}
#
# Combine sorted databases into OutputMatrix then dump to file.
# OutputMatrix column labels are:
# | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 |
# | Lot ID | Wafer ID | Device Pitch | Row | Column | Device Site | Analog Continuity | Analog Shorts | Digital Continuity | Digital Shorts |
#
my $k=1;
$i = 0;
for ($j = 0; $j <= $#Sorted_Analog_Data; $j=$j+2) {
@dataline = split(/\t/, $Sorted_Digital_Data[$i]);
if ( ($dataline[0] eq (split /\t/, $Sorted_Analog_Data[$j])[0]) && ($dataline[1] eq (split /\t/, $Sorted_Analog_Data[$j])[1]) ) {
$OutputMatrix[$k][0] = $dataline[0]; # Lot ID
$OutputMatrix[$k][1] = $dataline[1]; # Wafer ID
$OutputMatrix[$k][5] = $dataline[2]; # Site ID
$OutputMatrix[$k][3] = $dataline[3]; # Row
$OutputMatrix[$k][4] = $dataline[4]; # Column
$OutputMatrix[$k][2] = $dataline[5]; # Pitch
$OutputMatrix[$k][8] = $dataline[6]; # Digital continuity
$OutputMatrix[$k][9] = $dataline[7]; # Digital shorts
$OutputMatrix[$k][6] = (split /\t/, $Sorted_Analog_Data[$j])[6]; # Analog Continuity
$OutputMatrix[$k][7] = (split /\t/, $Sorted_Analog_Data[$j+1])[6]; # Analog Shorts
$i = $i + 1;
$k = $k + 1;
}
}
# Print sorted database to file.
for ($j=0; $j<=$#OutputMatrix; $j++) {
for ($k=0; $k<=9; $k++) {
if ($k<9) {
print OUTPUT $OutputMatrix[$j][$k], "\t";
}
else {
print OUTPUT $OutputMatrix[$j][$k];
}
}
}
close(OUTPUT);
close(OUTPUTA);
close(OUTPUTD);
print "Spider_Mask_Measurement_Validation_20101129: Script finished...\n";