diff --git a/blocks/__init__.py b/blocks/__init__.py
index 37850e3c..90230562 100644
--- a/blocks/__init__.py
+++ b/blocks/__init__.py
@@ -2,4 +2,4 @@
 # Scary warning: Adding code to this file can break namespace packages
 # See https://pythonhosted.org/setuptools/setuptools.html#namespace-packages
 __import__("pkg_resources").declare_namespace(__name__)
-__version__ = '0.0.1'
+__version__ = '0.1.1'
diff --git a/blocks/bricks/recurrent.py b/blocks/bricks/recurrent.py
index 498912a2..d49df50f 100644
--- a/blocks/bricks/recurrent.py
+++ b/blocks/bricks/recurrent.py
@@ -648,6 +648,11 @@ def apply(self, *args, **kwargs):
     def apply_delegate(self):
         return self.children[0].apply
 
+    def get_dim(self, name):
+        if name in self.apply.outputs:
+            return self.prototype.get_dim(name) * 2
+        return self.prototype.get_dim(name)
+
 RECURRENTSTACK_SEPARATOR = '#'
 
 
diff --git a/blocks/bricks/sequence_generators.py b/blocks/bricks/sequence_generators.py
index b08607e5..d43807d6 100644
--- a/blocks/bricks/sequence_generators.py
+++ b/blocks/bricks/sequence_generators.py
@@ -582,6 +582,19 @@ class AbstractEmitter(Brick):
 
     :class:`SoftmaxEmitter` : for integer outputs
 
+    Notes
+    -----
+    An important detail about the emitter cost is that it will be
+    evaluated with inputs of different dimensions so it has to be
+    flexible enough to handle this. The two ways in which it can be
+    applied are:
+
+        1. In :meth:BaseSequenceGenerator.cost_matrix where it will
+        be applied to the whole sequence at once.
+
+        2. In :meth:BaseSequenceGenerator.generate where it will be
+        applied to only one step of the sequence.
+
     """
     @abstractmethod
     def emit(self, readouts):
diff --git a/blocks/main_loop.py b/blocks/main_loop.py
index 54620d3e..b519a254 100644
--- a/blocks/main_loop.py
+++ b/blocks/main_loop.py
@@ -27,7 +27,7 @@
 
 epoch_interrupt_message = """
 
-Blocks will complete this epoch iteration of training and run extensions \
+Blocks will complete this epoch of training and run extensions \
 before exiting. If you do not want to complete this epoch, press CTRL + C \
 again to stop training after the current batch."""
 
diff --git a/docs/conf.py b/docs/conf.py
index 57942d1f..a726e04c 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -94,9 +94,9 @@ def __getattr__(cls, name):
 # built documents.
 #
 # The short X.Y version.
-version = '0.0'
+version = '0.1'
 # The full version, including alpha/beta/rc tags.
-release = '0.0.1'
+release = '0.1.1'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
diff --git a/docs/create_your_own_brick.rst b/docs/create_your_own_brick.rst
new file mode 100644
index 00000000..d67b0ce5
--- /dev/null
+++ b/docs/create_your_own_brick.rst
@@ -0,0 +1,433 @@
+Create your own brick
+=====================
+
+.. doctest::
+   :hide:
+
+    >>> import numpy
+    >>>
+    >>> import theano
+    >>> from theano import tensor
+    >>>
+    >>> from blocks.bricks import Brick, Initializable, Linear, Feedforward
+    >>> from blocks.bricks.base import lazy, application
+    >>> from blocks.bricks.parallel import Parallel
+    >>> from blocks.initialization import Constant
+    >>> from blocks.roles import add_role, WEIGHT
+    >>> from blocks.utils import shared_floatx_nans
+
+This tutorial explains how to create a custom brick, which is useful if you
+want to group several specific operations (which can be bricks themselves) into
+a single one so that you can easily reuse it.
+
+The first part of this tutorial lists the requirements and optional components
+that a brick should/can implement while the second part describes the
+construction of a simple toy brick.
+
+This tutorial assumes that you are already familiar with
+:doc:`bricks <bricks_overview>` and how to use them from a user point of view.
+
+
+Bricks ingredients and recipe
+-----------------------------
+
+All the bricks in Blocks inherit directly or indirectly from the
+:class:`.Brick`. There is already a rich inheritance hierarchy of
+bricks implemented in Blocks and thus, you should consider which brick level
+to inherit from. Bear in mind that multiple inheritance is often possible and
+advocated whenever it makes sense.
+
+Here are examples of possible bricks to inherit from:
+
+* :class:`.Sequence`: a sequence of bricks.
+* :class:`.Initializable`: a brick that defines a same initialiation scheme
+  (weights and biases) for all its children.
+* :class:`.Feedforward`: declares an interface for bricks with one input and
+  one output.
+* :class:`.Linear`: a linear transformation with optional bias. Inherits from
+  :class:`.Initializable` and :class:`.Feedforward`.
+* :class:`.BaseRecurrent`: the base class for recurrent bricks. Check the
+  :doc:`tutorial about rnns</rnn>` for more information.
+* many more!
+
+Let's say that you want to create a brick from scratch, simply inheriting
+from :class:`.Brick`, then you should consider overwriting the following
+methods (strictly speaking, all these methods are optional, check the docstring
+of :class:`.Brick` for a precise description of the life-cycle of a brick):
+
+* :meth:`.Brick.__init__`: you should pass by argument the attributes of your
+  brick. It is also in this method that you should create the potential
+  "children bricks" that belongs to your brick (in that case, you have to put
+  the children bricks into ``self.children``). The initialiazation of the
+  attributes can be lazy as described later in the tutorial.
+* :meth:`apply`: you need to implement a method that actually
+  implements the operation of the brick, taking as arguments the inputs
+  of the brick and returning its outputs. It can have any name and for simple
+  bricks is often named ``apply``. You should decorate it with the
+  :func:`.application` decorator, as explained in the next section. If you
+  design a recurrent brick, you should instead decorate it with the
+  :func:`.recurrent` decorator as explained in the
+  :doc:`tutorial about rnns</rnn>`.
+* :meth:`.Brick._allocate`: you should implement this method to allocate the
+  shared variables (often representing parameters) of the brick. In Blocks,
+  by convention, the built-in bricks allocate their shared variables with nan
+  values and we recommend you to do the same.
+* :meth:`.Brick._initialize`: you should implement this method to initialize
+  the shared variables of your brick. This method is called after the
+  allocation.
+* :meth:`.Brick._push_allocation_config`: you should consider overwriting
+  this method if you want to change configuration of the children bricks
+  before they allocate their parameters.
+* :meth:`.Brick._push_initialization_config`: you should consider
+  overwriting this method if you want to change the initialization schemes of
+  the children before they get initialized.
+  If the children bricks need to be initialized with the same scheme, then you
+  should inherit your brick from :class:`.Initializable`, which
+  automatically pushes the initialization schemes of your brick (provided as
+  arguments ``weights_init`` and ``biases_init`` of the constructor) to the
+  children bricks.
+* :meth:`.Brick.get_dim`: implementing this function is useful if you want
+  to provide a simple way to get the dimensions of the inputs and outputs of
+  the brick.
+
+If you want to inherit from a specific brick, check its docstring to
+identify the particular methods to overwrite and the attributes to define.
+
+Application methods
+~~~~~~~~~~~~~~~~~~~
+
+The :meth:`apply` method listed above is probably the most
+important method of your brick because it is the one that actually takes
+theano tensors as inputs, process them and return output tensors. You should
+decorate it with the :func:`.application` decorator, which names variables
+and register auxiliary variables of the operation you implement.
+It is used as follows:
+
+    >>> class Foo(Brick):
+    ...     @application(inputs=['input1', 'input2'], outputs=['output'])
+    ...     def apply(self, input1, input2):
+    ...         y = input1 + input2
+    ...         return y
+
+In the case above, it will automatically rename the theano tensor variable
+``input1`` to ``Foo_apply_input1``, ``input2`` to ``Foo_apply_input2`` and the 
+output of the method to ``foo_apply_output``. It will also add roles and names 
+to the tag attributes of the variables, as shown below:
+
+    >>> foo = Foo()
+    >>> i1 = tensor.matrix('i1')
+    >>> i2 = tensor.matrix('i2')
+    >>> y = foo.apply(i1, i2)
+    >>> theano.printing.debugprint(y)
+    Elemwise{identity} [@A] 'foo_apply_output'   
+     |Elemwise{add,no_inplace} [@B] ''   
+       |Elemwise{identity} [@C] 'foo_apply_input1'   
+       | |i1 [@D]
+       |Elemwise{identity} [@E] 'foo_apply_input2'   
+         |i2 [@F]
+    >>> print(y.name)
+    foo_apply_output
+    >>> print(y.tag.name)
+    output
+    >>> print(y.tag.roles)
+    [OUTPUT]
+
+Under the hood, the ``@application`` decorator creates an object of class
+:class:`.Application`, named ``apply``, which becomes an attribute of the
+brick class (by opposition to class instances):
+
+    >>> print(type(Foo.apply))
+    <class 'blocks.bricks.base.Application'>
+
+
+Application properties
+""""""""""""""""""""""
+
+In the previous examples, the names of the arguments of the application methods
+were directly provided as arguments of the ``@application`` decorator because
+they were common to all instances of the classes. On the other hand, if these
+names need to be defined differently for particular instances of the class, 
+you should use the ``apply.property`` decorator. Let's say that we want to
+name our attribute inputs with the string ``self.fancy_name``, then we should
+write:
+
+    >>> class Foo(Brick): # doctest: +SKIP
+    ...     def __init__(self, fancy_name):
+    ...         self.fancy_name = fancy_name
+    ...     @application
+    ...     def apply(self, input)
+    ...         ...
+    ...     @apply.property('inputs')
+    ...     def apply_inputs(self):
+    ...         # Note that you can use any python code to define the name
+    ...         return self.fancy_name
+
+Using application calls
+"""""""""""""""""""""""
+
+You may want to save particular variables defined in the ``apply`` method in 
+order to use them later, for example to monitor them during training. For that,
+you need to pass ``application_call`` as argument of your ``apply`` function
+and use the ``add_auxiliary_variable`` function to register your variables of 
+interest, as shown in this example:
+
+    >>> class Foo(Brick):
+    ...     @application
+    ...     def apply(self, x, application_call):
+    ...         application_call.add_auxiliary_variable(x.mean())
+    ...         return x + 1
+
+``add_auxiliary_variable`` annotates the variable ``x.mean()`` as an auxiliary 
+variable and you can thus later retrieve it with the computational graph 
+:class:`.ComputationGraph` and filters :class:`.VariableFilter`. In the
+case of the ``Foo`` Brick defined above, we retrieve ``x.mean() as follows:
+
+    >>> from blocks.graph import ComputationGraph
+    >>> x = tensor.fmatrix('x')
+    >>> y = Foo().apply(x)
+    >>> cg = ComputationGraph(y)
+    >>> print(cg.auxiliary_variables)
+    [mean]
+
+Lazy initialization
+~~~~~~~~~~~~~~~~~~~
+
+Instead of forcing the user to provide all the brick attributes as arguments
+to the :meth:`.Brick.__init__` method, you could let him/her specify them
+later, after the creation of the brick. To enable this mechanism,
+called lazy initialization, you need to decorate the constructor with the 
+:func:`.lazy` decorator:
+
+    >>> @lazy(allocation=['attr1', 'attr2']) # doctest: +SKIP
+    ... def __init__(self, attr1, attr1)
+    ...     ...
+
+This allows the user to specify ``attr1`` and ``attr2`` after the creation of 
+the brick. For example, the following ``ChainOfTwoFeedforward`` brick is
+composed of two :class:`.Feedforward` bricks for which you do not need to
+specify the ``input_dim`` of ``brick2`` directly at its creation.
+
+    >>> class ChainOfTwoFeedforward(Feedforward):
+    ...     """Two sequential Feedforward bricks."""
+    ...     def __init__(self, brick1, brick2, **kwargs):
+    ...         super(Feedforward, self).__init__(**kwargs)
+    ...         self.brick1 = brick1
+    ...         self.brick2 = brick2
+    ...         self.children = [self.brick1, self.brick2]
+    ...
+    ...     @property
+    ...     def input_dim(self):
+    ...         return self.brick1.input_dim
+    ...
+    ...     @input_dim.setter
+    ...     def input_dim(self, value):
+    ...         self.brick1.input_dim = value
+    ...
+    ...     @property
+    ...     def output_dim(self):
+    ...         return self.brick2.output_dim
+    ...
+    ...     @output_dim.setter
+    ...     def output_dim(self, value):
+    ...         self.brick2.output_dim = value
+    ...
+    ...     def _push_allocation_config(self):
+    ...         self.brick2.input_dim = self.brick1.get_dim('output')
+    ...
+    ...     @application
+    ...     def apply(self, x):
+    ...         return self.brick2.apply(self.brick1.apply(x))
+
+Note how ``get_dim`` is used to retrieve the ``input_dim`` of ``brick1``. You
+can now use a ``ChainOfTwoFeedforward`` brick as follows.
+
+    >>> brick1 = Linear(input_dim=3, output_dim=2, use_bias=False,
+    ...                 weights_init=Constant(2))
+    >>> brick2 = Linear(output_dim=4, use_bias=False, weights_init=Constant(2))
+    >>>
+    >>> seq = ChainOfTwoFeedforward(brick1, brick2)
+    >>> seq.initialize()
+    >>> brick2.input_dim
+    2
+
+
+Example
+-------
+
+For the sake of the tutorial, let's consider a toy operation that takes two
+batch inputs and multiplies them respectively by two matrices, resulting in two
+outputs.
+
+The first step is to identify which brick to inherit from. Clearly we are
+implementing a variant of the :class:`.Linear` brick. Contrary to
+:class:`.Linear`, ours has two inputs and two outputs, which means that we can
+not inherit from :class:`.Feedforward`, which requires a single input and a
+single output. Our brick will have to manage two shared variables
+representing the matrices to multiply the inputs with. As we want to initialize
+them with the same scheme, we should inherit from :class:`.Initializable`,
+which automatically push the initialization schemes to the children. The
+initialization schemes are provided as arguments ``weights_init``
+and ``biases_init`` of the constructor of our brick (in the ``kwargs``).
+
+
+    >>> class ParallelLinear(Initializable):
+    ...     r"""Two linear transformations without biases.
+    ...
+    ...     Brick which applies two linear (affine) transformations by
+    ...     multiplying its two inputs with two weight matrices, resulting in
+    ...     two outputs.
+    ...     The two inputs, weights and outputs can have different dimensions.
+    ...
+    ...     Parameters
+    ...     ----------
+    ...     input_dim{1,2} : int
+    ...         The dimensions of the two inputs.
+    ...     output_dim{1,2} : int
+    ...         The dimension of the two outputs.
+    ...     """
+    ...     @lazy(allocation=['input_dim1', 'input_dim2',
+    ...                       'output_dim1', 'output_dim2'])
+    ...     def __init__(self, input_dim1, input_dim2, output_dim1, output_dim2,
+    ...                  **kwargs):
+    ...         super(ParallelLinear, self).__init__(**kwargs)
+    ...         self.input_dim1 = input_dim1
+    ...         self.input_dim2 = input_dim2
+    ...         self.output_dim1 = output_dim1
+    ...         self.output_dim2 = output_dim2
+    ...
+    ...     def __allocate(self, input_dim, output_dim, number):
+    ...         W = shared_floatx_nans((input_dim, output_dim),
+    ...                                name='W'+number)
+    ...         add_role(W, WEIGHT)
+    ...         self.parameters.append(W)
+    ...         self.add_auxiliary_variable(W.norm(2), name='W'+number+'_norm')
+    ...
+    ...     def _allocate(self):
+    ...         self.__allocate(self.input_dim1, self.output_dim1, '1')
+    ...         self.__allocate(self.input_dim2, self.output_dim2, '2')
+    ...
+    ...     def _initialize(self):
+    ...         W1, W2 = self.parameters
+    ...         self.weights_init.initialize(W1, self.rng)
+    ...         self.weights_init.initialize(W2, self.rng)
+    ...
+    ...     @application(inputs=['input1_', 'input2_'], outputs=['output1',
+    ...         'output2'])
+    ...     def apply(self, input1_, input2_):
+    ...         """Apply the two linear transformations.
+    ...
+    ...         Parameters
+    ...         ----------
+    ...         input{1,2}_ : :class:`~tensor.TensorVariable`
+    ...             The two inputs on which to apply the transformations
+    ...
+    ...         Returns
+    ...         -------
+    ...         output{1,2} : :class:`~tensor.TensorVariable`
+    ...             The two inputs multiplied by their respective matrices
+    ...
+    ...         """
+    ...         W1, W2 = self.parameters
+    ...         output1 = tensor.dot(input1_, W1)
+    ...         output2 = tensor.dot(input2_, W2)
+    ...         return output1, output2
+    ...
+    ...     def get_dim(self, name):
+    ...         if name == 'input1_':
+    ...             return self.input_dim1
+    ...         if name == 'input2_':
+    ...             return self.input_dim2
+    ...         if name == 'output1':
+    ...             return self.output_dim1
+    ...         if name == 'output2':
+    ...             return self.output_dim2
+    ...         super(ParallelLinear, self).get_dim(name)
+
+You can test the brick as follows:
+
+   >>> input_dim1, input_dim2, output_dim1, output_dim2 = 10, 5, 2, 1
+   >>> batch_size1, batch_size2 = 1, 2
+   >>>
+   >>> x1_mat = 3 * numpy.ones((batch_size1, input_dim1),
+   ...                         dtype=theano.config.floatX)
+   >>> x2_mat = 4 * numpy.ones((batch_size2, input_dim2),
+   ...                         dtype=theano.config.floatX)
+   >>>
+   >>> x1 = theano.tensor.matrix('x1')
+   >>> x2 = theano.tensor.matrix('x2')
+   >>> parallel1 = ParallelLinear(input_dim1, input_dim2, output_dim1,
+   ...                            output_dim2, weights_init=Constant(2))
+   >>> parallel1.initialize()
+   >>> output1, output2 = parallel1.apply(x1, x2)
+   >>>
+   >>> f1 = theano.function([x1, x2], [output1, output2])
+   >>> f1(x1_mat, x2_mat) # doctest: +ELLIPSIS
+   [array([[ 60.,  60.]]...), array([[ 40.],
+          [ 40.]]...)]
+
+One can also create the brick using :class:`Linear` children bricks, which
+
+    >>> class ParallelLinear2(Initializable):
+    ...     def __init__(self, input_dim1, input_dim2, output_dim1, output_dim2,
+    ...                  **kwargs):
+    ...         super(ParallelLinear2, self).__init__(**kwargs)
+    ...         self.linear1 = Linear(input_dim1, output_dim1,
+    ...                               use_bias=False, **kwargs)
+    ...         self.linear2 = Linear(input_dim2, output_dim2,
+    ...                               use_bias=False, **kwargs)
+    ...         self.children = [self.linear1, self.linear2]
+    ...
+    ...     @application(inputs=['input1_', 'input2_'], outputs=['output1',
+    ...         'output2'])
+    ...     def apply(self, input1_, input2_):
+    ...         output1 = self.linear1.apply(input1_)
+    ...         output2 = self.linear2.apply(input2_)
+    ...         return output1, output2
+    ...
+    ...     def get_dim(self, name):
+    ...         if name in ['input1_', 'output1']:
+    ...             return self.linear1.get_dim(name)
+    ...         if name in ['input2_', 'output2']:
+    ...             return self.linear2.get_dim(name)
+    ...         super(ParallelLinear2, self).get_dim(name)
+
+You can test this new version as follows:
+
+   >>> parallel2 = ParallelLinear2(input_dim1, input_dim2, output_dim1,
+   ...                             output_dim2, weights_init=Constant(2))
+   >>> parallel2.initialize()
+   >>> # The weights_init initialization scheme is pushed to the children
+   >>> # bricks. We can verify it as follows.
+   >>> w = parallel2.weights_init
+   >>> w0 = parallel2.children[0].weights_init
+   >>> w1 = parallel2.children[1].weights_init
+   >>> print(w == w0 == w1)
+   True
+   >>>
+   >>> output1, output2 = parallel2.apply(x1, x2)
+   >>>
+   >>> f2 = theano.function([x1, x2], [output1, output2])
+   >>> f2(x1_mat, x2_mat) # doctest: +ELLIPSIS
+   [array([[ 60.,  60.]]...), array([[ 40.],
+          [ 40.]]...)]
+
+Actually it was not even necessary to create a custom brick for this particular
+operation as Blocks has a brick, called :class:``Parallel``, which
+applies the same prototype brick to several inputs. In our case the prototype
+brick we want to apply to our two inputs is a :class:``Linear`` brick with no
+bias:
+
+   >>> parallel3 = Parallel(
+   ...     prototype=Linear(use_bias=False),
+   ...     input_names=['input1_', 'input2_'],
+   ...     input_dims=[input_dim1, input_dim2],
+   ...     output_dims=[output_dim1, output_dim2], weights_init=Constant(2))
+   >>> parallel3.initialize()
+   >>>
+   >>> output1, output2 = parallel3.apply(x1, x2)
+   >>>
+   >>> f3 = theano.function([x1, x2], [output1, output2])
+   >>> f3(x1_mat, x2_mat) # doctest: +ELLIPSIS
+   [array([[ 60.,  60.]]...), array([[ 40.],
+          [ 40.]]...)]
+
diff --git a/docs/index.rst b/docs/index.rst
index b16e395e..25655bde 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -46,6 +46,7 @@ In-depth
 
    rnn
    configuration
+   create_your_own_brick
    serialization
    api/index.rst
    development/index.rst