At work we were comparing two measurement techniques that involved two different instrument setups, one taking digital voltage threshold values and the other taking an analog resistance measurement to detect a gross shorting in a circuit. The software interfaces to these instruments were written in LabView a long long time ago, and did not output the data in a similar format. Both measurements were done on the same set of devices, and I wanted to match up the results between the two. My original perl script for this was just a brute force linear search between the two data files, looking for matching pairs and outputting the results in a new table file. Turns out that the ~23,000 data points was a bit too much for such lazy programming, and soon realized that this simple task was taking all day. After a friend showed me how quickly he could match the data sets using C# and LINQ I realized that this script could do something similar if I used perl's "sort" function. So here is the script for future reference.
use warnings;
use strict; 
open(DIGITAL, "<spider_mask_defect_database.txt");
open(ANALOG, "<spider_mask_resistance_database.txt");
open(OUTPUT, ">Spider_Mask_Measurement_Verification.txt");  
open(OUTPUTA, ">Analog_Data.txt");
open(OUTPUTD, ">Digital_Data.txt");
print "Spider_Mask_Measurement_Validation_20101129: Script started...\n";
my %LotNames = ("FDC25-001_Res" => "FDC25-001_Retest2", 
        "FDC25-002_Res" => "FDC25-002_Retest", 
        "FDC25-002_Shorts_Res" => "FDC25-002_Retest", 
        "FDC25-003_Cont_Res" => "FDC25-003", 
        "FDC25-003_Shorts_Res" => "FDC25-003", 
        "FDC25-004_Res" => "FDC25-004", 
        "FDC25-005_Res" => "FDC25-005", 
        "FDC25-006_Res" => "FDC25-006", 
        "Spider_Mask_Res" => "Spider_Mask_PEN_Lot", 
        "FDC25-007_Res" => "FDC25-007");
my %PitchNames = ("3um" => 3,
          "6um" => 6,
          "9um" => 9,
          "12um"=> 12);
my $DigitalHeader = <DIGITAL>;
my $AnalogHeader = <ANALOG>; 
my $i=0;
my $j=0; 
my @OutputMatrix; 
$OutputMatrix[0][0] = "Lot ID";
$OutputMatrix[0][1] = "Wafer ID";
$OutputMatrix[0][2] = "Device Pitch";
$OutputMatrix[0][3] = "Row";
$OutputMatrix[0][4] = "Column";
$OutputMatrix[0][5] = "Device Site";
$OutputMatrix[0][6] = "Analog Continuity";
$OutputMatrix[0][7] = "Analog Shorts";
$OutputMatrix[0][8] = "Digital Continuity";
$OutputMatrix[0][9] = "Digital Shorts\n";
#
# Note that:
# "Lot ID" = "Lot ID"
# "Wafer ID" = "Wafer ID"
# "Device ID" = "Pitch"
# "Row" =  "Position X"
# "Column" = "Position Y"
# "Site ID" = "Site"
#
#
# In resistance database column labels are: 
# |   1  |    2   |    3    | 4 |   5  |   6   |     7    |        8       |
# |Lot ID|Wafer ID|Device ID|Row|Column|Site ID|Resistance|Measurement Type|
#
# In defect database column labels are:
# |   1  |  2  | 3  |     4    |     5    |  6  |     7    |   8  |
# |Lot ID|Wafer|Site|Position X|Position Y|Pitch|Continuity|Shorts|
#
my @DIGITALDATA = ();
@DIGITALDATA = <DIGITAL>;
close(DIGITAL);
my @ANALOGDATA = ();
my @dataline = (); 
# Use %LotNames to rename all analog lot labels to their digital counterparts. 
while (<ANALOG>) {
  @dataline = split(/\t/, $_);
  $dataline[0] = $LotNames{$dataline[0]} or die "$dataline[0] not listed in LotNames: $!\n";
  push(@ANALOGDATA, join("\t", @dataline));
}
if ($#ANALOGDATA <= 0) {
  print "ANALOGDATA is empty\n";
  exit(-1);
}
if ($#DIGITALDATA <= 0) {
  print "DIGITALDATA is empty\n";
  exit(-1);
}
close(ANALOG);
print "Done loading database into memory. Starting data sorting and merging...\n"; 
my @Sorted_Digital_Data = ();
my @Sorted_Analog_Data = (); 
my @Temp_Analog_Data = (); 
@Temp_Analog_Data = map { (split /\t/, $_)[2] =~ /([0-9]+)um/; [$1, $_] } @ANALOGDATA;
@Sorted_Digital_Data = sort {(split /\t/, $a)[0] cmp (split /\t/,$b)[0] || (split /\t/,$a)[1] cmp (split /\t/,$b)[1] || (split /\t/,$a)[2] cmp (split /\t/,$b)[2] || (split /\t/,$a)[5] <=> (split /\t/,$b)[5] || (split /\t/,$a)[3] <=> (split /\t/,$b)[3] || (split /\t/,$a)[4] <=> (split /\t/,$b)[4]} @DIGITALDATA;
@Temp_Analog_Data = sort {(split /\t/,$a->[1])[0] cmp (split /\t/,$b->[1])[0] || (split /\t/,$a->[1])[1] cmp (split /\t/,$b->[1])[1] || (split /\t/,$a->[1])[5] cmp (split /\t/,$b->[1])[5] || $a->[0] <=> $b->[0] || (split /\t/,$a->[1])[3] <=> (split /\t/,$b->[1])[3] || (split /\t/,$a->[1])[4] <=> (split /\t/,$b->[1])[4] || (split /\t/,$a->[1])[7] cmp (split /\t/,$b->[1])[7]} @Temp_Analog_Data; 
@Sorted_Analog_Data = map { $_->[1] } @Temp_Analog_Data; 
# Print out sorted data into files for analysis
print OUTPUTD $DigitalHeader;
for ($i=0; $i<=$#Sorted_Digital_Data; $i++) {
  print OUTPUTD $Sorted_Digital_Data[$i];
}
print OUTPUTA $AnalogHeader; 
for ($i=0; $i<=$#Sorted_Analog_Data; $i++) {
  print OUTPUTA $Sorted_Analog_Data[$i];
}
#
# Combine sorted databases into OutputMatrix then dump to file.
# OutputMatrix column labels are:
# |    0   |     1    |      2       |  3  |   4    |      5      |         6         |       7       |         8          |       9        |
# | Lot ID | Wafer ID | Device Pitch | Row | Column | Device Site | Analog Continuity | Analog Shorts | Digital Continuity | Digital Shorts |
#
my $k=1;
$i = 0;
for ($j = 0; $j <= $#Sorted_Analog_Data; $j=$j+2) {
      @dataline = split(/\t/, $Sorted_Digital_Data[$i]);
      if ( ($dataline[0] eq (split /\t/, $Sorted_Analog_Data[$j])[0]) && ($dataline[1] eq (split /\t/, $Sorted_Analog_Data[$j])[1]) ) {
    $OutputMatrix[$k][0] = $dataline[0]; # Lot ID
    $OutputMatrix[$k][1] = $dataline[1]; # Wafer ID
    $OutputMatrix[$k][5] = $dataline[2]; # Site ID
    $OutputMatrix[$k][3] = $dataline[3]; # Row
    $OutputMatrix[$k][4] = $dataline[4]; # Column
    $OutputMatrix[$k][2] = $dataline[5]; # Pitch
    $OutputMatrix[$k][8] = $dataline[6]; # Digital continuity
    $OutputMatrix[$k][9] = $dataline[7]; # Digital shorts
    $OutputMatrix[$k][6] = (split /\t/, $Sorted_Analog_Data[$j])[6]; # Analog Continuity
    $OutputMatrix[$k][7] = (split /\t/, $Sorted_Analog_Data[$j+1])[6]; # Analog Shorts
    $i = $i + 1;
    $k = $k + 1;
    }
  }
# Print sorted database to file.
for ($j=0; $j<=$#OutputMatrix; $j++) {
  for ($k=0; $k<=9; $k++) {
    if ($k<9) {
      print OUTPUT $OutputMatrix[$j][$k], "\t"; 
    }
    else {
      print OUTPUT $OutputMatrix[$j][$k];
    }
  }
}
close(OUTPUT);
close(OUTPUTA);
close(OUTPUTD);
print "Spider_Mask_Measurement_Validation_20101129: Script finished...\n";
No comments:
Post a Comment