Adding README

summanlp · Mar 31, 2015 · cb87ba5 · cb87ba5
1 parent 32e7c81
commit cb87ba5
Show file tree

Hide file tree

Showing 8 changed files with 91 additions and 30 deletions.
diff --git a/MANIFEST b/MANIFEST
@@ -1,4 +1,5 @@
 # file GENERATED by distutils, do NOT edit
+README
 setup.cfg
 setup.py
 summa/__init__.py
@@ -15,20 +16,3 @@ summa/preprocessing/__init__.py
 summa/preprocessing/porter.py
 summa/preprocessing/snowball.py
 summa/preprocessing/textcleaner.py
-summa/views/celegans.gexf
-summa/views/config.js
-summa/views/index.html
-summa/views/miserables.gexf
-summa/views/sentences.gexf
-summa/views/words.gexf
-summa/views/img/fleches-horiz.png
-summa/views/img/gephi.png
-summa/views/img/loupe-edges.png
-summa/views/img/plusmoins.png
-summa/views/img/search.gif
-summa/views/js/gexfjs.js
-summa/views/js/jquery-2.0.2.min.js
-summa/views/js/jquery-ui-1.10.3.custom.min.js
-summa/views/js/jquery.mousewheel.min.js
-summa/views/styles/gexfjs.css
-summa/views/styles/jquery-ui-1.10.3.custom.min.css
diff --git a/README b/README
@@ -0,0 +1,79 @@
+==============================================
+summa - textrank
+==============================================
+
+TextRank_ implementation for text summarization and keyword extraction in Python
+
+.. _TextRank: http://web.eecs.umich.edu/~mihalcea/papers/mihalcea.emnlp04.pdf
+
+Features
+---------
+
+* Text summarization
+* Keyword extraction
+* Text modeling with graph and gexf exportation
+
+Examples
+--------
+
+Text summarization::
+
+ >>> text = "Automatic summarization is the process of reducing a text document with a
+ computer program in order to create a summary that retains the most important points
+ of the original document. As the problem of information overload has grown, and as
+ the quantity of data has increased, so has interest in automatic summarization.
+ Technologies that can make a coherent summary take into account variables such as
+ length, writing style and syntax. An example of the use of summarization technology
+ is search engines such as Google. Document summarization is another."
+
+ >>> from summa import summarizer
+ >>> print summarizer.summarize(text)
+ 'Automatic summarization is the process of reducing a text document with a computer
+ program in order to create a summary that retains the most important points of the
+ original document.'
+
+
+Keyword extraction::
+
+ >>> from summa import keywords
+ >>> print keywords.keywords(text)
+ document
+ automatic summarization
+ technologies
+ technology
+
+
+Installation
+--------------
+
+This software depends on `NumPy and Scipy <http://www.scipy.org/Download>`_, two Python packages for scientific computing.
+You must have them installed prior to installing `summa`::
+
+ pip install summa
+
+
+If you are going to use the export function, you also need `NetworkX <https://networkx.github.io/download.html>`_.
+For a better performance of keyword extraction, install `Pattern <http://www.clips.ua.ac.be/pattern>`_
+
+This version has been tested under Python 2.7
+
+
+More examples
+-------------
+
+Command-line usage::
+
+ cd path/to/folder/summa/
+ python textrank.py -t FILE
+
+
+Export::
+
+ >>> from summa import export
+ >>> export.gexf_export(text, path="graph.gexf")
+
+
+-------------
+
+Summa is open source software released under the `The MIT License (MIT) <http://opensource.org/licenses/MIT>`_.
+Copyright (c) 2014 - now Summa NLP
diff --git a/README.rst b/README.rst
diff --git a/setup.cfg b/setup.cfg
@@ -1,2 +1,2 @@
 [metadata]
-description-file = README.md
+description-file = README
diff --git a/setup.py b/setup.py
@@ -5,14 +5,14 @@
  name = 'summa',
  packages = ['summa', 'summa.preprocessing'],
  package_data = {
- 'summa': ['views/*', 'views/img/*', 'views/js/*', 'views/styles/*']
+ 'summa': ['README', 'LICENSE']
  },
- version = '0.0.1',
+ version = '0.0.4',
  description = 'A text summarization and keyword extraction package',
  author = 'Federico Barrios, Federico Lopez',
  author_email = '[email protected]',
  url = 'https:/summanlp/textrank',
- download_url = 'https:/summanlp/textrank/tarball/v0.0.2',
+ download_url = 'https:/summanlp/textrank/tarball/v0.0.3',
  keywords = ['summa', 'nlp', 'summarization', "NLP", "natural language processing", "automatic summarization",
  "keywords", "summary", "textrank", "pagerank"],
  classifiers = [
@@ -34,5 +34,5 @@
  # that you indicate whether you support Python 2, Python 3 or both.
  'Programming Language :: Python :: 2.7'
  ],
- long_description = open('README.rst').read()
+ long_description = open('README').read()
 )
diff --git a/summa/export.py b/summa/export.py
@@ -27,7 +27,7 @@ def _write_gexf(graph, scores, path="test.gexf", labels=None):
  nx.write_gexf(nx_graph, path)
  shell("sed -i 's/<ns0/<viz/g' {0}".format(path))
  shell('echo \'<?xml version="1.0" encoding="UTF-8"?>\' | cat - {0} > out.tmp && mv out.tmp {0}'.format(path))
- shell("mv {0} views/{0}".format(path))
+ #shell("mv {0} views/{0}".format(path))
 
 
 def _get_nx_graph(graph):

diff --git a/summa/pagerank_weighted.py b/summa/pagerank_weighted.py
@@ -2,7 +2,10 @@
 from scipy.sparse import csr_matrix
 from scipy.linalg import eig
 from numpy import empty as empty_matrix
-import pdb
+
+from numpy import VisibleDeprecationWarning
+import warnings
+warnings.filterwarnings("ignore", category=VisibleDeprecationWarning)
 
 CONVERGENCE_THRESHOLD = 0.0001
 

diff --git a/summa/textrank.py b/summa/textrank.py
@@ -1,3 +1,4 @@
+#!/usr/bin/env python -W ignore::DeprecationWarning
 
 import sys, getopt
 from summarizer import summarize