As mentioned during my one of Professor Pister’s group meetings.. you can treat language as an MDP. So given a sample text, you obtain the probabilities of each word following its prior, and sample those distributions to get mildly coherent text.

Given Toobin’s Money Unlimited, and asking for 8 words, gives you the following: “some have asserted that viewers should have known”;

# (c) 2010 Leo Keselman New BSD
#!/usr/bin/python

import sys
import random

class Counter(dict):
  def __getitem__(self, idx):
    self.setdefault(idx, 0)
    return dict.__getitem__(self, idx)
  def normalize(self):
    total = float(sum(self.values()))
    if total == 0: return
    for key in self.keys():
      self[key] = self[key] / total

input = open(sys.argv[1])

bank = {}
word = ""
prevword = ""
delim = ['/','-',' ','.',',','?','!',';',':','"',')','(','&']
punct = [x for x in delim if x not in [' ',',']]

def update_bank(db,key,val):
    key = key.lower()
    val = val.lower()
    if not db.has_key(key):
        db[key] = Counter()
    db[key][val] += 1
def sample(dist):
    r = random.random()
    base = 0.0
    for element, prob in dist:
        base += prob
        if r:
            return element

for ch in input.read():
    if ch == '\n':
        continue
    if ch not in delim:
        word += ch
    else:
        if ch in punct:
            update_bank(bank,prevword,word)
            update_bank(bank,word,ch)
            prevword = ch
            word=""
        else:
            update_bank(bank,prevword,word)
            prevword = word
            word = ""

for word in bank.values():
    word.normalize()
output = open('gen_txt','w')

word = '.'

for i in range(int(sys.argv[2])):
    next_w = sample(bank[word].items())
    if next_w == '.':
        output.write(".\n")
    else:
        output.write(next_w)
        output.write(' ')
    word = next_w