How to build a low-level database using java RandomAccessFile part 4

package hamner.db;
import java.io.*;
import java.util.*;
public class RecordsFile extends BaseRecordsFile {
  /**
   * Hashtable which holds the in-memory index. For efficiency, the entire index 
   * is cached in memory. The hashtable maps a key of type String to a RecordHeader.
   */
  protected Hashtable memIndex;    
  /**
   * Creates a new database file.  The initialSize parameter determines the 
   * amount of space which is allocated for the index.  The index can grow 
   * dynamically, but the parameter is provide to increase 
   * efficiency. 
   */
  public RecordsFile(String dbPath, int initialSize) throws IOException, RecordsFileException {
    super(dbPath, initialSize);
    memIndex = new Hashtable(initialSize);
  }
  /**
   * Opens an existing database and initializes the in-memory index. 
   */
  public RecordsFile(String dbPath, String accessFlags) throws IOException, RecordsFileException {
    super(dbPath, accessFlags);
    int numRecords = readNumRecordsHeader();
    memIndex = new Hashtable(numRecords);
    for (int i = 0; i < numRecords; i++) {
      String key = readKeyFromIndex(i);
      RecordHeader header = readRecordHeaderFromIndex(i);
      header.setIndexPosition(i);
      memIndex.put(key, header);
    }
  }
  /**
   * Returns an enumeration of all the keys in the database.
   */
  public synchronized Enumeration enumerateKeys() {
    return memIndex.keys();
  }
  /**
   * Returns the current number of records in the database. 
   */
  public synchronized int getNumRecords() {
    return memIndex.size();
  }
  /**
   * Checks if there is a record belonging to the given key. 
   */
  public synchronized boolean recordExists(String key) {
    return memIndex.containsKey(key);
  }
  /**
   * Maps a key to a record header by looking it up in the in-memory index.
   */
  protected RecordHeader keyToRecordHeader(String key) throws RecordsFileException {
    RecordHeader h = (RecordHeader)memIndex.get(key);
    if (h==null) {
      throw new RecordsFileException("Key not found: " + key);
    } 
    return h;
  }
  /**
   * This method searches the file for free space and then returns a RecordHeader 
   * which uses the space. (O(n) memory accesses)
   */
  protected RecordHeader allocateRecord(String key, int dataLength) throws RecordsFileException, IOException {
    // search for empty space
    RecordHeader newRecord = null;
    Enumeration e = memIndex.elements();
    while (e.hasMoreElements()) {
      RecordHeader next = (RecordHeader)e.nextElement();
      int free = next.getFreeSpace();
      if (dataLength <= next.getFreeSpace()) {
    newRecord = next.split();
    writeRecordHeaderToIndex(next);
    break;
      }
    }
    if (newRecord == null) {
      // append record to end of file - grows file to allocate space
      long fp = getFileLength();
      setFileLength(fp + dataLength);
      newRecord = new RecordHeader(fp, dataLength);
    } 
    return newRecord;
  }
  /**
   * Returns the record to which the target file pointer belongs - meaning the specified location
   * in the file is part of the record data of the RecordHeader which is returned.  Returns null if 
   * the location is not part of a record. (O(n) mem accesses)
   */
  protected RecordHeader getRecordAt(long targetFp) throws RecordsFileException {
    Enumeration e = memIndex.elements();
    while (e.hasMoreElements()) {
      RecordHeader next = (RecordHeader) e.nextElement();
      if (targetFp >= next.dataPointer &&
      targetFp < next.dataPointer + (long)next.dataCapacity) {
    return next;
      }
    }
    return null;
  }
  /**
   * Closes the database. 
   */
  public synchronized void close() throws IOException, RecordsFileException {
    try {
      super.close();
    } finally {
      memIndex.clear();
      memIndex = null;
    }
  }
  /**
   * Adds the new record to the in-memory index and calls the super class add
   * the index entry to the file. 
   */
  protected void addEntryToIndex(String key, RecordHeader newRecord, int currentNumRecords) throws IOException, RecordsFileException {
    super.addEntryToIndex(key, newRecord, currentNumRecords);
    memIndex.put(key, newRecord);   
  }
  /**
   * Removes the record from the index. Replaces the target with the entry at the 
   * end of the index. 
   */
  protected void deleteEntryFromIndex(String key, RecordHeader header, int currentNumRecords) throws IOException, RecordsFileException {
    super.deleteEntryFromIndex(key, header, currentNumRecords);
    RecordHeader deleted = (RecordHeader)memIndex.remove(key);
  }
}

ble memIndex is used to cache the entire file index in memory. The hashtable maps keys to record headers.

public RecordsFile(String dbPath, String accessFlags) throws IOException, RecordsFileException {
   super(dbPath, accessFlags);
   int numRecords = readNumRecordsHeader();
   memIndex = new Hashtable(numRecords);
   for (int i = 0; i < numRecords; i++) {
      String key = readKeyFromIndex(i);
      RecordHeader header = readRecordHeaderFromIndex(i);
      header.setIndexPosition(i);
      memIndex.put(key, header);
   }
}

This constructor initializes the file by calling the super's constructor and then prepares the in-memory index by iterating over the index entries while using the readKeyFromIndex() and readRecordHeaderFromIndex() methods to add entries to the hashtable.

protected RecordHeader keyToRecordHeader(String key) throws RecordsFileException {
   RecordHeader h = (RecordHeader)memIndex.get(key);
   if (h==null) {
      throw new RecordsFileException("Key not found: " + key);
   } 
   return h;
}

This method maps a record's key to its RecordHeader. Note that because no file accesses are required, and the lookup is done through the hashtable, this operation takes place in constant order time; that is, the time it takes to look up one record is not a function of the number of records in the file.

protected RecordHeader allocateRecord(String key, int dataLength) throws RecordsFileException, IOException {
   // search for empty space
   RecordHeader newRecord = null;    
   Enumeration e = memIndex.elements();
   while (e.hasMoreElements()) {
      RecordHeader next = (RecordHeader)e.nextElement();
      int free = next.getFreeSpace();
      if (dataLength <= next.getFreeSpace()) {
         newRecord = next.split();
         writeRecordHeaderToIndex(next);
         break;
      }
   }
   if (newRecord == null) {
      // append record to end of file - grow file to allocate space
      long fp = getFileLength();
      setFileLength(fp + dataLength);
      newRecord = new RecordHeader(fp, dataLength);
   }
   return newRecord;
}

This method locates free space for a new record of size dataLength by iterating over all the entries in memIndex until adequate space is found. If space is not found in an existing record, the new record is appended to the end of the file. If the record does contain space, it is split into two parts: one for the existing record and one for the new record.

I need to point out here that this is not the optimal solution for this operation because it makes inserting an element an order n operation with respect to memory accesses. A more advanced implementation of the RecordsFile class would keep a sorted memory map so that finding free space would give log(n) performance. However, this implementation still holds true to our design requirement, since the number of file accesses required is not dependent on the number of records in the file.

protected RecordHeader getRecordAt(long targetFp) throws RecordsFileException {
   Enumeration e = memIndex.elements();
   while (e.hasMoreElements()) {
      RecordHeader next = (RecordHeader) e.nextElement();
      if (targetFp >= next.dataPointer && targetFp < next.dataPointer + (long)next.dataCapacity) {
         return next;
      }
   }
   return null;
}

This method locates the record that has data residing at the indicated position in the file. As with the last method, this implementation is O(n) with respect to memory accesses and leaves room for improvement.

Class RecordHeader

RecordHeader provides a wrapper to hold key information about a record.

package hamner.db;
import java.io.*;
public class RecordHeader {
  /**
   * File pointer to the first byte of record data (8 bytes).
   */
  protected long dataPointer; 
  /**
   * Actual number of bytes of data held in this record (4 bytes).
   */
  protected int dataCount;
  /**
   * Number of bytes of data that this record can hold (4 bytes).
   */
  protected int dataCapacity;  
  /**
   * Indicates this header's position in the file index.
   */
  protected int indexPosition;
  protected RecordHeader() {
  }
  protected RecordHeader(long dataPointer, int dataCapacity) {
    if (dataCapacity < 1) {
      throw new IllegalArgumentException("Bad record size: " + dataCapacity);
    }
    this.dataPointer = dataPointer;
    this.dataCapacity = dataCapacity;
    this.dataCount = 0;
  }
  protected int getIndexPosition() {
    return indexPosition;
  }
  protected void setIndexPosition(int indexPosition) {
    this.indexPosition = indexPosition;
  }
  protected int getDataCapacity() {
    return dataCapacity;
  }
  protected int getFreeSpace() {
    return dataCapacity - dataCount;
  }
  protected void read(DataInput in) throws IOException {
    dataPointer = in.readLong();
    dataCapacity = in.readInt();
    dataCount = in.readInt();
  }
  protected void write(DataOutput out) throws IOException {
    out.writeLong(dataPointer);
    out.writeInt(dataCapacity);
    out.writeInt(dataCount);
  }
  protected static RecordHeader readHeader(DataInput in) throws IOException {
    RecordHeader r = new RecordHeader();
    r.read(in);
    return r;
  }
  /**
   * Returns a new record header which occupies the free space of this record.
   * Shrinks this record size by the size of its free space.
   */
  protected RecordHeader split() throws RecordsFileException {
    long newFp = dataPointer + (long)dataCount;
    RecordHeader newRecord = new RecordHeader(newFp, getFreeSpace());
    dataCapacity = dataCount;
    return newRecord;
  }
}

Figure 3. RecordHeader

Here are the highlights of the source:

protected long dataPointer;

The dataPointer is a file pointer to the first byte of this record's data.

protected int dataCount;

The dataCount is used to keep track of the actual number of bytes of data held in this record. This is the number of valid bytes written and should not to be confused with the dataCapacity.

protected int dataCapacity;  

The dataCapacity is the number of bytes of data this record can hold. This value will always be equal to or larger than the dataCount.

protected int indexPosition;

The index position is used to speed accesses to this header's entry in the file. Unlike the other fields in this class, this value is stored only in-memory and is not written to the file as part of the record header.

protected void write(DataOutput out) throws IOException {
   out.writeLong(dataPointer);
   out.writeInt(dataCapacity);
   out.writeInt(dataCount);
}

The write() method writes the header to the file.

protected void read(DataInput in) throws IOException {
   dataPointer = in.readLong();
   dataCapacity = in.readInt();
   dataCount = in.readInt();
}

This method reads and initializes the header fields from the file. Note that the fields are read in the same order in which they were written.

protected RecordHeader split() throws RecordsFileException {
   long newFp = dataPointer + (long)dataCount;
   RecordHeader newRecord = new RecordHeader(newFp, getFreeSpace());
   dataCapacity = dataCount;
   return newRecord;
}

The split() method returns a new record header, which occupies the free space of this record. It also shrinks this record's size by the size of its free space.

Class RecordWriter

RecordWriter is used to construct a new record and write it to the file as a byte buffer. To accomplish this, the user writes data into a DbByteArrayOutputStream, which is in turn written to the file. This design protects the file from being corrupted by the user, since the user is never allowed to write directly to the file. This speeds things up, since the user can build a record (often times consuming due to object serialization) without blocking other users from reading and writing records.

package hamner.db;
import java.io.*;
public class RecordWriter {
  String key;
  DbByteArrayOutputStream out;
  ObjectOutputStream objOut;
  public RecordWriter(String key) {
    this.key = key;
    out = new DbByteArrayOutputStream();
  }
  public String getKey() {
    return key;
  }
  public OutputStream getOutputStream() {
    return out;
  }
  public ObjectOutputStream getObjectOutputStream() throws IOException {
    if (objOut == null) {
      objOut = new ObjectOutputStream(out);
    }
    return objOut;
  }
  public void writeObject(Object o) throws IOException {
    getObjectOutputStream().writeObject(o);
    getObjectOutputStream().flush();
  }
  /**
   * Returns the number of bytes in the data.
   */
  public int getDataLength() {
    return out.size();
  }
  /**
   *  Writes the data out to the stream without re-allocating the buffer.
   */
  public void writeTo(DataOutput str) throws IOException {
    out.writeTo(str);
  }
}

Figure 4. RecordWriter

Here are the code highlights for this class:

String key;
DbByteArrayOutputStream out;
ObjectOutputStream objOut;
public RecordWriter(String key) {
   this.key = key;
   out = new DbByteArrayOutputStream();
}

The constructor sets the key field and then creates a DbByteArrayOutputStream, which is used to buffer the data written to the record.

public ObjectOutputStream getObjectOutputStream() throws IOException {
   if (objOut == null) {
      objOut = new ObjectOutputStream(out);
   }
   return objOut;
}

This method returns an ObjectOutputStream that can be used to store serialized objects in the record.

public void writeObject(Object o) throws IOException {
   getObjectOutputStream().writeObject(o);
   getObjectOutputStream().flush();
}

This utility method serializes an object to the record.

public int getDataLength() {
   return out.size();
}

This method returns the number of bytes of data in this record. Note that this method will only return the correct value if all buffered streams constructed off of the output stream have been flushed.

public void writeTo(DataOutput str) throws IOException {
   out.writeTo(str);
}

This method writes the record data to the DataOutput stream. It uses the writeTo() method of class DbByteArrayOutputStream.

Class RecordReader

A RecordReader is used to read data from the record. It is a utility class that makes record data more accessible than it is in its raw byte buffer form.

package hamner.db;
import java.io.*;
public class RecordReader {
  String key;
  byte[] data;
  ByteArrayInputStream in;
  ObjectInputStream objIn;
  public RecordReader(String key, byte[] data) {
    this.key = key;
    this.data = data;
    in = new ByteArrayInputStream(data);
  }
  public String getKey() {
    return key;
  }
  public byte[] getData() {
    return data;
  }
  public InputStream getInputStream() throws IOException {
    return in;
  }
  public ObjectInputStream getObjectInputStream() throws IOException {
    if (objIn == null) {
      objIn = new ObjectInputStream(in);
    }
    return objIn;
  }
  /**
   * Reads the next object in the record using an ObjectInputStream.
   */
  public Object readObject() throws IOException, OptionalDataException, ClassNotFoundException {
    return getObjectInputStream().readObject();
  }
}

Figure 5. RecordReader

Here's a look at the code highlights:

String key;
byte[] data;
ByteArrayInputStream in;
ObjectInputStream objIn;

The RecordReader has four instance variables:

  • The record's key

  • A byte buffer that holds the record data

  • A ByteArrayInputStream for reading the record data

  • An ObjectInputStream used for reading serialized objects from the record

public RecordReader(String key, byte[] data) {
   this.key = key;
   this.data = data;
   in = new ByteArrayInputStream(data);
}

The constructor initializes the key and data fields and then creates the ByteArrayInputStream off of the data buffer.

public InputStream getInputStream() throws IOException {
   return in;
}

This method returns an input stream for reading fields from the record.

public ObjectInputStream getObjectInputStream() throws IOException {
   if (objIn == null) {
      objIn = new ObjectInputStream(in);
   }
   return objIn;
}

This method returns an ObjectOutputStream for reading serialized objects. The user should use either this method or the getInputStream() method for reading data -- mixed access will produce undefined results.

public Object readObject() throws IOException, OptionalDataException, ClassNotFoundException {
   return getObjectInputStream().readObject();
}

This is a utility method to read the next serialized object from the objIn stream.

Class DbByteArrayOutputStream

DbByteArrayOutputStream is a small utility class that extends ByteArrayOutputStream in order to provide a way to write the buffer to a DataOutput without reallocating it.

package hamner.db;
import java.io.*;
/**
 * Extends ByteArrayOutputStream to provide a way of writing the buffer to
 * a DataOutput without re-allocating it.
 */
public class DbByteArrayOutputStream extends ByteArrayOutputStream {
  public DbByteArrayOutputStream() {
    super();
  }
  public DbByteArrayOutputStream(int size) {
    super(size);
  }
  /**
   * Writes the full contents of the buffer a DataOutput stream.
   */
  public synchronized void writeTo (DataOutput dstr) throws IOException {
    byte[] data = super.buf;
    int l = super.size();
    dstr.write(data, 0, l);
  }
}

Figure 6. DbByteArrayOutputStream

Class RecordsFileException

loading...

Related posts

Related posts