Hash Search

A hash search applies a calculation called a hashing algorithm to the search item to generate its location. (The items in the list or file must have been placed there already using the same algorithm). A hash search is very efficient because it achieves almost direct access to the data. A hashing algorithm should spread the data evenly without many collisions (which result when more than one value hashes to the same address). One solution to a collision when entering the data is to store the item in the next available location. If you reach the end of the list or file, continue searching for a free location at the beginning. It may be slightly more efficient to search backwards for the available location because a comparison with the start location (0) in machine code is more efficient than a comparison with the last (non-zero) location.

In A-level textbooks, the usual introductory hashing algorithm is to take the mod of the integer key field with respect to the number of locations available for the data. (e.g. Location := SearchValue MOD 1000 for 1000 locations). This is fine for random data. Hashing with respect to a prime number improves the spreading of the data if it is not random. Also, instead of using the next available location after a collision, you can jump by an offset generated by a second algorithm. If you have a string to hash, you can combine the ASCII codes for the letters in some way to generate an integer. Because of the similarity of many words and the likelihood of collisions, you may decide to make the algorithm depend on the position of each letter in the word. This is relevant to the design of compilers, where rapid access to data in symbol tables is vital.

Collision resolution by finding available slots works well when there are many available slots (low loading factor). If the amount of memory is limited, you can use a linked list to store colliding items. Each item in the linked list contains a pointer to the next item that hashed to the same location. Put the pointer in the link field of the record and initialise it to nil. You can put all but the first item in the linked list into an overflow area.

Program RandomFileDemo has a low loading factor (high redundancy) initially. Searches continue until an empty record is found, so you must not fill all of the locations. Deletion is achieved by changing the Status field from 'o' for occupied to 'd'. We produced our test file by using the data generator at generatedata.com to produce a spreadsheet which we saved as names.csv and have made available for download here. (Direct generation of the csv file did not give us convenient line breaks).

program RandomFileDemo;
  {$APPTYPE CONSOLE}
uses
  SysUtils, StrUtils;
type
  Names = record
    ID : integer;
    Surname : string[20];
    Status : char;   //'o' for occupied, 'e' for empty, 'd' for deleted
  end;
const
  PATHNAME = 'RandomNames.txt';
var
  SearchID, Choice : integer;
  RandomFile : file of Names;

procedure Create;
var
  CurrentRecord : Names;
  Count : integer;
begin
  CurrentRecord.ID := 0;
  CurrentRecord.Surname := '';
  CurrentRecord.Status := 'e'; //empty
  assignFile(RandomFile, PATHNAME);
  rewrite(RandomFile);
  for Count := 0 to 999 do
    write(RandomFile, CurrentRecord);
end;

procedure Populate;
var
  NamesFile : text;
  CurrentLine : string;
  CountLine, FilePos : integer;
  CurrentRecord, ExaminedRecord : Names;
begin
  assignfile(NamesFile, 'Names.csv');
  reset(NamesFile);
  readln(NamesFile, CurrentLine);
  for CountLine := 1 to 200 do
    begin
      readln(NamesFile, CurrentLine);
      CurrentRecord.ID := strToInt(LeftStr(CurrentLine, 4));
      CurrentRecord.Surname := RightStr(CurrentLine, Length(CurrentLine)-5);
      CurrentRecord.Status := 'o'; //occupied
      FilePos := CurrentRecord.ID MOD 1000;
      repeat
        seek(RandomFile, FilePos);
        read(RandomFile, ExaminedRecord);
        if ExaminedRecord.Status = 'e' then
          begin
            //Back one place to write the record
            seek(RandomFile, FilePos);
            write(RandomFile, CurrentRecord);
          end
        else
          begin
            writeln('Position ', FilePos, ' unavailable');
            inc(FilePos);
            if filePos = 1000 then
              filepos := 0;
          end;
      until ExaminedRecord.Status = 'e';
    end;
  CloseFile(NamesFile);
end;

function Search(StartPos : integer) : integer;
var
  CurrentRecord : Names;
  FilePos, Attempts : integer;
begin
  FilePos := StartPos MOD 1000;
  Attempts := 1;
  repeat
    seek(RandomFile, FilePos);
    read(RandomFile, CurrentRecord);
    if (CurrentRecord.ID <> StartPos) or (CurrentRecord.Status = 'd') then //wrong ID or deleted
      begin
        inc(FilePos);
        inc(Attempts);
        if FilePos = 1000 then
          FilePos := 0;
      end;
  until ((CurrentRecord.ID = StartPos) and (CurrentRecord.Status = 'o')) or (CurrentRecord.Status = 'e');
  if CurrentRecord.ID = StartPos then
    begin
      writeln('Record number ', CurrentRecord.ID, ' with surname ', CurrentRecord.Surname,
             ' found after ', Attempts, ' attempt(s) at file position ', FilePos);
      result := FilePos;
    end
  else
    begin
      writeln('Not found');
      result := -1;
    end;
end;

procedure AddRecord;
var
  NewID : integer;
  NewRecord, CurrentRecord : Names;
  FilePos : integer;
begin
  //Find file position that can be accessed without a collision
  repeat
    NewID := Random(9000) + 1000;
    FilePos := NewID MOD 1000;
    seek(RandomFile, FilePos);
    read(RandomFile, CurrentRecord);
  until CurrentRecord.Status in ['e', 'd']; //Can replace empty or deleted file
  seek(RandomFile, FilePos);
  write('Please enter the new surname. ');
  readln(NewRecord.Surname);
  NewRecord.ID := NewID;
  NewRecord.Status := 'o'; //occupied
  write(RandomFile, NewRecord);
  writeln(NewRecord.surname, ' with ID ', NewID, ' entered at position ', FilePos);
end;

procedure Delete(ID : integer);
var
  FilePos : integer;
  CurrentRecord : Names;
begin
  FilePos := Search(ID);
  if FilePos > 0 then
    begin
      seek(RandomFile, FilePos);
      read(RandomFile, CurrentRecord);
      CurrentRecord.Status := 'd';
      seek(RandomFile, FilePos);
      write(RandomFile, CurrentRecord);
      writeln('Surname ', CurrentRecord.Surname, ' deleted');
    end;
end;

begin
  randomize;
  if not fileExists(PATHNAME) then
    Create;
  assignFile(RandomFile, PATHNAME);
  reset(RandomFile);
  repeat
    writeln(#13#10'Please type the number of your choice.');
    writeln('1 - Create a file of blank records.');
    writeln('    (Use this to start from scratch again.)');
    writeln('2 - Load 200 names into file (requires ''Names.csv'')');
    writeln('3 - Find a record using its ID');
    writeln('4 - Add a record');
    writeln('5 - Delete a record');
    writeln('6 - Quit');
    readln(Choice);
    case choice of
      1 : Create;
      2 : Populate;
      3 : begin
            write('What is the ID of the record to find? ');
            readln(SearchID);
            Search(SearchID);
          end;
      4 : AddRecord;
      5 : begin
            write('What is the ID of the record to delete? ');
            readln(SearchID);
            Delete(SearchID);
          end;
    end;//case
  until Choice = 6;
  closeFile (RandomFile);
end.

procedure Delete(ID : integer);
var
  RandomFile : file of Names;
  FilePos : integer;
  CurrentRecord : Names;
begin
  FilePos := Search(ID);
  if FilePos > 0 then
    begin
      assignFile(RandomFile, 'RandomNames.txt');
      reset(RandomFile);
      seek(RandomFile, FilePos);
      read(RandomFile, CurrentRecord);
      CurrentRecord.Status := 'd';
      seek(RandomFile, FilePos);
      write(RandomFile, CurrentRecord);
      writeln('Surname ', CurrentRecord.Surname, ' deleted');
      closeFile(RandomFile);
    end;
end;

begin
  randomize;
  repeat
    writeln(#13#10'Please type the number of your choice.');
    writeln('1 - Create a file of blank records.');
    writeln('    (This is necessary the first time you use the program');
    writeln('    and can be used later to start from scratch again.)');
    writeln('2 - Load 200 names into file (requires Names.csv)');
    writeln('3 - Find a record');
    writeln('4 - Add a record');
    writeln('5 - Delete a record');
    writeln('6 - Quit');
    readln(Choice);
    case choice of
      1 : Create;
      2 : Populate;
      3 : begin
            write('What is the ID of the record to find? ');
            readln(SearchID);
            Search(SearchID);
          end;
      4 : AddRecord;
      5 : begin
            write('What is the ID of the record to delete? ');
            readln(SearchID);
            Delete(SearchID);
          end;
    end;//case
  until Choice = 6;
end.
Programming - a skill for life!

Linear search, binary search, binary search tree and hash search