Hashing-to-Disk by Hand
#!/usr/bin/perl -w
######################### Hashing to Disk #################################
use constant RECS_PER_BUCKET => scalar 4;
use constant RECORD_SIZE => scalar 25;
use constant BUCKETS => scalar 100;
if (-e "hashfile") {open(H,"+<hashfile") || die "Cannot open hashfile.\n"}
if (!-e "hashfile") {open(H,"+>hashfile") || die "Cannot open hashfile.\n"}
# Create an empty hashfile where each record is space-filled, \n-terminated
# if hashfile does not yet have any size (which means it is new).
if (-z H)
{
print "empty!\n";
$record = " " x (RECORD_SIZE - 1) . "\n";
print H $record x (RECS_PER_BUCKET * BUCKETS);
}
# Account number format is XX-XXXXX-XXX
while(print("Enter account balance: "), ($line = <STDIN>) !~ /^\s*quit\s*$/i)
{
next if $line =~ /^\s*$/; # Re-prompt if no input.
# Trim leading and trailing spaces from input line.
$line =~ s/^\s*|\s*$//g;
# Split at whitespace to get account number and balance off of input line.
($acct, $balance, $junk) = split /\s+/, $line;
# Is there an excess field on the input line?
if (defined $junk)
{
print "Bad line format! Must be two fields!\n";
next;
}
# See if account number and balance are in the correct format.
$error = error_check($acct, $balance);
next if $error;
# Compute hash value by unique account number key.
$hash = hash_calc($acct);
# Format record and go to proper hash bucket.
$record = sprintf("%12s%12.2f\n", $acct, $balance);
seek(H, $hash * RECORD_SIZE * RECS_PER_BUCKET, 0);
# Make sure the record isn't already in the bucket.
$duplicate = check_for_duplicate($acct);
next if $duplicate;
# Store the record. Report error if no room in bucket.
store_record($record);
} # Finally, the end of the while loop!!
# Input loop! Do a search for records. This will also tell us if our
# hash algorithm is correct.
while(print("Enter account: "), ($acct = <STDIN>) !~ /^\s*quit\s*$/i)
{
next if $acct =~ /^\s*$/; # Re-prompt if no input.
# Remove leading/trailing spaces from account number.
$acct =~ s/^\s*|\s*$//g;
# Check for bad account number format.
if ($acct !~ /^\d{2}-\d{5}-\d{3}$/)
{
print "Bad account number format!\n";
next;
}
# Compute hash value by unique account number key.
$hash = hash_calc($acct);
# Go to proper hash bucket.
seek(H, $hash * RECORD_SIZE * RECS_PER_BUCKET, 0);
# Search for record.
find_record($acct);
} # End of input while loop.
# Report errors and reprompt if account number or balance in wrong format.
sub error_check
{
my ($acct, $balance) = @_;
if ($acct !~ /^\d{2}-\d{5}-\d{3}$/)
{
print "Bad account format!\n";
return 1;
}
if ($balance !~ /^\d+\.\d{2}$/)
{
print "Bad balance format!\n";
return 1;
}
return 0;
}
# Calculate the hash value of the user's record by summing the squares
# of the ASCII values of the characters of the account number and then
# modulo by the number of buckets.
sub hash_calc
{
my ($acct) = @_;
my (@sqrs, $hash, $square);
@sqrs = map {ord() * ord()} split //, $acct;
$hash = 0;
foreach $square (@sqrs) {$hash += $square}
$hash %= BUCKETS; # Figure out bucket location.
return $hash;
}
# Don't allow duplicate entries to become part of the database.
sub check_for_duplicate
{
my ($acct) = @_;
my $searchRec;
for (1..RECS_PER_BUCKET)
{
$searchRec = <H>;
if ($acct eq substr($searchRec, 0, 12))
{
print "Record already exists!\n";
return 1;
}
}
seek(H, -(RECS_PER_BUCKET * RECORD_SIZE), 1); # Back to start of bucket
return 0;
}
# Find record.
sub find_record
{
my ($acct) = @_;
my $searchRec;
for (1..RECS_PER_BUCKET)
{
$searchRec = <H>;
if ($acct eq substr($searchRec, 0, 12))
{
print substr($searchRec,0,12)," ", substr($searchRec,13,13);
return;
}
}
print "Record not found!\n";
}
# Store record if there's room in the bucket. Assumes file pointer is
# positioned at the beginning of the bucket.
sub store_record
{
my ($record) = @_;
for (1..RECS_PER_BUCKET)
{
$searchRec = <H>;
if (substr($searchRec, 0, 1) eq " ") # Back up and store!
{
seek(H, -(RECORD_SIZE), 1);
print H $record;
return 1;
}
}
print "Hash bucket overflow! Could not store record!\n";
return 0;
}
############################### Program Session ############################
$ hash.pl
Enter account balance: 11-11111-111 44.55
Enter account balance: 22-22222-222 9876.09
Enter account balance: 33-33333-333 54.87
Enter account balance: 98-09876-555 23456.00
Enter account balance: 11-11111-111 55.55
Record already exists!
Enter account balance: 11-1234-111 99.88
Bad account format!
Enter account balance: 11-6543e-987 88.77
Bad account format!
Enter account balance: 111-11-111
Bad account format!
Enter account balance: 12-12345-678
Bad balance format!
Enter account balance: 333-33333-333 87.987
Bad account format!
Enter account balance: quit
Enter account: 44-44444-444
Record not found!
Enter account: 11-11111-111
11-11111-111 44.55
Enter account: quit
$ hash.pl
Enter account balance: 33-33333-333
Bad balance format!
Enter account balance: quit
Enter account: 33-33333-333
Record not found!
Enter account: quit