"""
A Python dict implementation.
"""
import collections
MINSIZE = 8
PERTURB_SHIFT = 5
dummy = "<dummy key>"
class Entry(object):
"""
A hash table entry.
Attributes:
* key - The key for this entry.
* hash - The has of the key.
* value - The value associated with the key.
"""
__slots__ = ("key", "value", "hash")
def __init__(self):
self.key = None
self.value = None
self.hash = 0
def __repr__(self):
return "<Entry: key={0} value={1}>".format(self.key, self.value)
class Dict(object):
"""
A mapping interface implemented as a hash table.
Attributes:
* used - The number of entires used in the table.
* filled - used + number of entries with a dummy key.
* table - List of entries; contains the actual dict data.
* mask - Length of table - 1. Used to fetch values.
"""
__slots__ = ("filled", "used", "mask", "table")
def __init__(self, arg=None, **kwargs):
self.clear()
self._update(arg, kwargs)
@classmethod
def fromkeys(cls, keys, value=0):
"""
Return a new dictionary from a sequence of keys.
"""
d = cls()
for key in keys:
d[key] = value
return d
def clear(self):
"""
Clear the dictionary of all data.
"""
self.filled = 0
self.used = 0
self.mask = MINSIZE - 1
self.table = []
# Initialize the table to a clean slate of entries.
for i in range(MINSIZE):
self.table.append(Entry())
def pop(self, *args):
"""
Remove and return the value for a key.
"""
have_default = len(args) == 2
try:
v = self[args[0]]
except KeyError:
if have_default:
return args[1]
raise
else:
del self[args[0]]
return v
def popitem(self):
"""
Remove and return any key-value pair from the dictionary.
"""
if self.used == 0:
raise KeyError("empty dictionary")
entry0 = self.table[0]
entry = entry0
i = 0
if entry0.value is None:
# The first entry in the table's hash is abused to hold the index to
# the next place to look for a value to pop.
i = entry0.hash
if i > self.mask or i < i:
i = 1
entry = self.table[i]
while entry.value is None:
i += 1
if i > self.mask:
i = 1
entry = self.table[i]
res = entry.key, entry.value
self._del(entry)
# Set the next place to start.
entry0.hash = i + 1
return res
def setdefault(self, key, default=0):
"""
If key is in the dictionary, return it. Otherwise, set it to the default
value.
"""
val = self._lookup(key).value
if val is None:
self[key] = default
return default
return val
def _lookup(self, key):
"""
Find the entry for a key.
"""
key_hash = hash(key)
i = key_hash & self.mask
entry = self.table[i]
if entry.key is None or entry is key:
return entry
free = None
if entry.key is dummy:
free = entry
elif entry.hash == key_hash and key == entry.key:
return entry
perturb = key_hash
while True:
i = (i << 2) + i + perturb + 1;
entry = self.table[i & self.mask]
if entry.key is None:
return entry if free is None else free
if entry.key is key or \
(entry.hash == key_hash and key == entry.key):
return entry
elif entry.key is dummy and free is None:
free = dummy
perturb >>= PERTURB_SHIFT
assert False, "not reached"
def _resize(self, minused):
"""
Resize the dictionary to at least minused.
"""
newsize = MINSIZE
# Find the smalled value for newsize.
while newsize <= minused and newsize > 0:
newsize <<= 1
oldtable = self.table
# Create a new table newsize long.
newtable = []
while len(newtable) < newsize:
newtable.append(Entry())
# Replace the old table.
self.table = newtable
self.used = 0
self.filled = 0
# Copy the old data into the new table.
for entry in oldtable:
if entry.value is not None:
self._insert_into_clean(entry)
elif entry.key is dummy:
entry.key = None
self.mask = newsize - 1
def _insert_into_clean(self, entry):
"""
Insert an item in a clean dict. This is a helper for resizing.
"""
i = entry.hash & self.mask
new_entry = self.table[i]
perturb = entry.hash
while new_entry.key is not None:
i = (i << 2) + i + perturb + 1
new_entry = self.table[i & self.mask]
perturb >>= PERTURB_SHIFT
new_entry.key = entry.key
new_entry.value = entry.value
new_entry.hash = entry.hash
self.used += 1
self.filled += 1
def _insert(self, key, value):
"""
Add a new value to the dictionary or replace an old one.
"""
entry = self._lookup(key)
if entry.value is None:
self.used += 1
if entry.key is not dummy:
self.filled += 1
entry.key = key
entry.hash = hash(key)
entry.value = value
def _del(self, entry):
"""
Mark an entry as free with the dummy key.
"""
entry.key = dummy
entry.value = None
self.used -= 1
def __getitem__(self, key):
value = self._lookup(key).value
if value is None:
# Check if we're a subclass.
if type(self) is not Dict:
# Try to call the __missing__ method.
missing = getattr(self, "__missing__")
if missing is not None:
return missing(key)
raise KeyError("no such key: {0!r}".format(key))
return value
def __setitem__(self, key, what):
# None is used as a marker for empty entries, so it can't be in a
# dictionary.
assert what is not None and key is not None, \
"key and value must not be None"
old_used = self.used
self._insert(key, what)
# Maybe resize the dict.
if not (self.used > old_used and
self.filled*3 >= (self.mask + 1)*2):
return
# Large dictionaries (< 5000) are only doubled in size.
factor = 2 if self.used > 5000 else 4
self._resize(factor*self.used)
def __delitem__(self, key):
entry = self._lookup(key)
if entry.value is None:
raise KeyError("no such key: {0!r}".format(key))
self._del(entry)
def __contains__(self, key):
"""
Check if a key is in the dictionary.
"""
return self._lookup(key).value is not None
def __eq__(self, other):
if not isinstance(other, Dict):
try:
# Try to coerce the other to a Dict, so we can compare it.
other = Dict(other)
except TypeError:
return NotImplemented
if self.used != other.used:
# They're not the same size.
return False
# Look through the table and compare every entry, breaking out early if
# we find a difference.
for entry in self.table:
if entry.value is not None:
try:
bval = other[entry.key]
except KeyError:
return False
if not bval == entry.value:
return False
return True
def __ne__(self, other):
return not self == other
def keys(self):
"""
Return a list of keys in the dictionary.
"""
return [entry.key for entry in self.table if entry.value is not None]
def values(self):
"""
Return a list of values in the dictionary.
"""
return [entry.value for entry in self.table if entry.value is not None]
def items(self):
"""
Return a list of key-value pairs.
"""
return [(entry.key, entry.value) for entry in self.table
if entry.value is not None]
def __iter__(self):
return DictKeysIterator(self)
def itervalues(self):
"""
Return an iterator over the values in the dictionary.
"""
return DictValuesIterator(self)
def iterkeys(self):
"""
Return an iterator over the keys in the dictionary.
"""
return DictKeysIterator(self)
def iteritems(self):
"""
Return an iterator over key-value pairs.
"""
return DictItemsIterator(self)
def _merge(self, mapping):
"""
Update the dictionary from a mapping.
"""
for key in mapping.keys():
self[key] = mapping[key]
def _from_sequence(self, seq):
for double in seq:
if len(double) != 2:
raise ValueError("{0!r} doesn't have a length of 2".format(
double))
self[double[0]] = double[1]
def _update(self, arg, kwargs):
if arg:
if isinstance(arg, collections.Mapping):
self._merge(arg)
else:
self._from_sequence(arg)
if kwargs:
self._merge(kwargs)
def update(self, arg=None, **kwargs):
"""
Update the dictionary from a mapping or sequence containing key-value
pairs. Any existing values are overwritten.
"""
self._update(arg, kwargs)
def get(self, key, default=0):
"""
Return the value for key if it exists otherwise the default.
"""
try:
return self[key]
except KeyError:
return default
def __len__(self):
return self.used
def __repr__(self):
r = ["{0!r} : {1!r}".format(k, v) for k, v in self.iteritems()]
return "Dict({" + ", ".join(r) + "})"
collections.Mapping.register(Dict)
class DictIterator(object):
def __init__(self, d):
self.d = d
self.used = self.d.used
self.len = self.d.used
self.pos = 0
def __iter__(self):
return self
def next(self):
# Check if the dictionary has been mutated under us.
if self.used != self.d.used:
# Make this state permanent.
self.used = -1
raise RuntimeError("dictionary size changed during interation")
i = self.pos
while i <= self.d.mask and self.d.table[i].value is None:
i += 1
self.pos = i + 1
if i > self.d.mask:
# We're done.
raise StopIteration
self.len -= 1
return self._extract(self.d.table[i])
__next__ = next
def _extract(self, entry):
return getattr(entry, self.kind)
def __len__(self):
return self.len
class DictKeysIterator(DictIterator):
kind = "key"
class DictValuesIterator(DictIterator):
kind = "value"
class DictItemsIterator(DictIterator):
def _extract(self, entry):
return entry.key, entry.value
Showing posts with label learning. Show all posts
Showing posts with label learning. Show all posts
Sunday, October 19, 2008
Pure Python Dictionary Implementation
For those curious about how CPython's dict implementation works, I've written a Python implementation using the same algorithms. Aside from the education value, it's pretty useless because it doesn't support None as a value and is extremely slow. You can get the source in a Bazaar repo: http://code.python.org/python/users/benjamin.peterson/pydict/
Sunday, July 13, 2008
Learning Django
Quite honestly, I haven't done any web programming since I quit PHP. I do hear a lot of about all those Python web frameworks, so when I saw a link to the Django Book, I took the initiative and started reading. I haven't tried out any other frameworks, but my initial reaction is to love Django. It's obviously well designed around DRY and the Zen and very well documented. Django models are beautiful and makes SQL very painless.
Having worked through the examples in the book, I've started conceiving a larger project. I'd like to make my personal homepage. Obviously, it would have a lot of static content, and I want to do that in the clearest way possible. So, I'll steal a hint from the django docs: write my site in reST and dynamically generate the webpage with docutils. (It will be a good exercise with caching.) In fact, I have a version running locally.
[My first lazyweb query:] Where can I get good Python hosting? I'll need to upload Django, too because I'm running off SVN. Can it be gotten cheap or even for free?
BTW, I'm now aggregated on Planet Python!
Having worked through the examples in the book, I've started conceiving a larger project. I'd like to make my personal homepage. Obviously, it would have a lot of static content, and I want to do that in the clearest way possible. So, I'll steal a hint from the django docs: write my site in reST and dynamically generate the webpage with docutils. (It will be a good exercise with caching.) In fact, I have a version running locally.
[My first lazyweb query:] Where can I get good Python hosting? I'll need to upload Django, too because I'm running off SVN. Can it be gotten cheap or even for free?
BTW, I'm now aggregated on Planet Python!
Subscribe to:
Posts (Atom)