Skip to content

Commit 9efa489

Browse files
committed
Add functional Flask application
1 parent e07fbb8 commit 9efa489

File tree

13 files changed

+295
-20
lines changed

13 files changed

+295
-20
lines changed

init_celery.sh

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
#!/bin/sh
2+
source .venv/bin/activate
3+
celery worker -A neam.python.app.celery -E --loglevel=info
4+

neam/python/app/__init__.py

+54
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,60 @@
11
from flask import Flask
2+
from celery import Celery
3+
import os
24

5+
6+
def make_celery(app):
7+
"""
8+
Initializes Celery for Flask
9+
10+
:param app: A Flask application
11+
:return: A Celery instance
12+
"""
13+
celery = Celery(
14+
app.import_name,
15+
backend=app.config['CELERY_RESULT_BACKEND'],
16+
broker=app.config['CELERY_BROKER_URL']
17+
)
18+
celery.conf.update(app.config)
19+
20+
class ContextTask(celery.Task):
21+
def __call__(self, *args, **kwargs):
22+
with app.app_context():
23+
return self.run(*args, **kwargs)
24+
25+
celery.Task = ContextTask
26+
return celery
27+
28+
29+
# Initialize the application and configure it
330
app = Flask(__name__)
31+
app.config['UPLOAD_FOLDER'] = '/tmp/'
32+
app.config['CELERY_BROKER_URL'] = 'redis://localhost:6379/0'
33+
app.config['CELERY_RESULT_BACKEND'] = 'redis://localhost:6379/0'
434

535
from neam.python.app import routes
636

37+
celery = make_celery(app)
38+
39+
40+
@celery.task(bind=True)
41+
def neam_annotate(self, filename, email):
42+
"""
43+
Annotates a document with NEAM
44+
45+
TODO: Ensure the file exists
46+
47+
:param filename: The name of the file to annotate
48+
:param email: An email to send the annotated file to
49+
:return: A response object that has as its result the name of the annotated file
50+
"""
51+
new_file = filename + '.xml'
52+
53+
self.update_state(state='PROGRESS', meta={})
54+
with open(os.path.join(app.config['UPLOAD_FOLDER'], filename)) as f:
55+
from neam.python.neam import main as neam
56+
with open(os.path.join(app.config['UPLOAD_FOLDER'], new_file), 'w') as out:
57+
out.write(neam(f))
58+
59+
return {'result': new_file}
60+

neam/python/app/routes.py

+70-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,75 @@
1-
from neam.python.app import app
1+
import os
2+
3+
from flask import render_template, request, send_from_directory, jsonify, url_for
4+
from neam.python.app import app, celery, neam_annotate
5+
26

37
@app.route('/')
48
@app.route('/index')
59
def index():
6-
return 'Hello, World!'
10+
""" Routes the user to the index page """
11+
return render_template('index.html', title='NEAM Annotate')
12+
13+
14+
@app.route('/annotate', methods=['POST'])
15+
def annotate():
16+
"""
17+
Annotates a document
18+
19+
TODO: Add validation
20+
21+
:return: An HTTP response, where the Location key corresponds to the URI to check on
22+
the annotation process
23+
"""
24+
# Grab the data from the request
25+
email = request.form['email']
26+
f = request.files['file']
27+
28+
# Save the file so the worker can find it
29+
f.save(os.path.join(app.config['UPLOAD_FOLDER'], f.filename))
30+
31+
# Fire off a worker to annotate the file
32+
t = neam_annotate.delay(f.filename, email)
33+
34+
return jsonify({}), 202, {'Location': url_for('taskstatus', task_id=t.id)}
35+
36+
37+
@app.route('/status/<task_id>')
38+
def taskstatus(task_id):
39+
"""
40+
Checks on the status of a worker
41+
42+
:param task_id: The ID of the worker
43+
:type task_id: str
44+
:return: The status of the worker
45+
"""
46+
# Find the task
47+
# TODO: handle invalid queries
48+
task = my_task.AsyncResult(task_id)
49+
50+
# Set the values on a response object
51+
response = { 'state': task.state }
52+
if task.state == 'PENDING':
53+
response['status'] = 'Pending'
54+
elif task.state != 'FAILURE':
55+
response['status'] = task.info.get('status', '')
56+
if 'result' in task.info:
57+
response['result'] = task.info['result']
58+
else:
59+
response['status'] = str(task.info)
60+
61+
return jsonify(response)
62+
63+
64+
@app.route('/download/<filename>')
65+
def download(filename):
66+
"""
67+
Downloads a file from the server
68+
69+
TODO: validate the input
70+
71+
:param filename: The name of the file to download
72+
:return: The requested file
73+
"""
74+
return send_from_directory(os.path.join(app.config['UPLOAD_FOLDER']), filename, as_attachment=True)
775

neam/python/app/static/css/style.css

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
strong {
2+
font-weight: bolder;
3+
}
4+
5+
.box {
6+
padding: 2rem;
7+
}

neam/python/app/static/js/index.js

+40
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
$(() => {
2+
let elements = {
3+
progress_bar: $('#progress-bar'),
4+
button: $('#submit-button'),
5+
text: $('#info-text')
6+
};
7+
8+
elements['progress_bar'].hide();
9+
elements['text'].hide();
10+
11+
$('#annotation-form').ajaxForm({
12+
success(data, textStatus, request) {
13+
status_url = request.getResponseHeader('Location');
14+
update_progress(status_url, elements);
15+
},
16+
beforeSubmit() {
17+
elements['progress_bar'].show();
18+
elements['text'].show();
19+
elements['button'].addClass('disabled');
20+
}
21+
});
22+
});
23+
24+
function update_progress(status_url, elements) {
25+
$.getJSON(status_url, data => {
26+
let state = data['state'];
27+
28+
if (state == 'PENDING' || state == 'PROGRESS') {
29+
setTimeout(() => {
30+
update_progress(status_url, elements);
31+
}, 2000);
32+
} else {
33+
window.location = '/download/' + data['result'];
34+
elements['progress_bar'].hide();
35+
elements['text'].hide();
36+
elements['button'].removeClass('disabled');
37+
}
38+
});
39+
}
40+

neam/python/app/templates/index.html

+42
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
{% extends "layout.html" %}
2+
3+
{% block content %}
4+
<h1 class="center-align">NEAM</h1>
5+
<div class="row box z-depth-2">
6+
<form id="annotation-form" class="col s12" method="POST" action="/annotate" enctype="multipart/form-data">
7+
<div class="row">
8+
<div class="file-field input-field col s12">
9+
<div class="btn waves-effect waves-light">
10+
<span>File</span>
11+
<input name="file" type="file" required />
12+
</div>
13+
<div class="file-path-wrapper">
14+
<input placeholder="Upload a file to be annotated" class="file-path" type="text" />
15+
</div>
16+
</div>
17+
</div>
18+
<div class="row">
19+
<div class="input-field col s12">
20+
<input name="email" type="email" />
21+
<label for="email">Email</label>
22+
<span class="helper-text">If you don't want to wait, we'll send the annotation here when it's finished.</span>
23+
</div>
24+
</div>
25+
<div class="row">
26+
<button id="submit-button" class="btn waves-effect waves-light right" type="submit">
27+
Annotate
28+
<i class="material-icons right">send</i>
29+
</button>
30+
</div>
31+
</form>
32+
<div class="progress" id="progress-bar">
33+
<div class="indeterminate"></div>
34+
</div>
35+
<p class="right-align red-text" id="info-text">Hold tight; we're working on your annotation!</p>
36+
</div>
37+
{% endblock %}
38+
39+
{% block scripts %}
40+
<script src="{{ url_for('static', filename='js/index.js') }}"></script>
41+
{% endblock %}
42+

neam/python/app/templates/layout.html

+47
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
<!doctype html>
2+
<html>
3+
<head>
4+
<title>{{ title }}</title>
5+
6+
<!-- Materialize stylesheet -->
7+
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/materialize/1.0.0-beta/css/materialize.min.css" />
8+
<!-- Materialize icons -->
9+
<link href="https://fonts.googleapis.com/icon?family=Material+Icons" rel="stylesheet" />
10+
<!-- App-specific stylesheet -->
11+
<link href="{{ url_for('static', filename='css/style.css') }}" rel="stylesheet" />
12+
</head>
13+
<body>
14+
<!-- <nav>
15+
<div class="nav-wrapper">
16+
<ul id="nav-mobile" class="left hide-on-med-and-down">
17+
<li><a>Link</a></li>
18+
</ul>
19+
</div>
20+
</nav> -->
21+
22+
<div class="container">
23+
{% block content %}{% endblock %}
24+
</div>
25+
26+
<!-- <footer class="page-footer">
27+
<div class="container">
28+
<h5>Footer Content</h5>
29+
</div>
30+
<div class="footer-copyright">
31+
<div class="container">
32+
© 2018 Copyright Text
33+
</div>
34+
</div>
35+
</footer> -->
36+
37+
<!-- Materialize JS -->
38+
<script src="https://cdnjs.cloudflare.com/ajax/libs/materialize/1.0.0-beta/js/materialize.min.js"></script>
39+
<!-- jQuery -->
40+
<script src="//cdnjs.cloudflare.com/ajax/libs/jquery/2.1.3/jquery.min.js"></script>
41+
<!-- jQuery AJAX form -->
42+
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery.form/4.2.2/jquery.form.min.js"></script>
43+
44+
{% block scripts %}{% endblock %}
45+
</body>
46+
</html>
47+
+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
{% extends "layout.html" %}
2+
3+
{% block content %}
4+
<div class="container z-depth-2" style="padding: 2rem;">
5+
<h1>Your document is processing</h1>
6+
<p>Thanks! Processing of <strong>{{ file }}</strong> will take a few minutes. We'll email the annotated document to <strong>{{ email }}</strong> when it's done!</p>
7+
</div>
8+
{% endblock %}
9+

neam/python/classification/classifier.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
1-
from neam.python.java import clms, java
1+
from neam.python.java import clms, java, boot_java
22
from neam.python.classification.processing import NEAMProcessor
33

4-
NEAMClassifier = clms.neam.classify.NEAMClassifier
5-
64
CORE_NLP_DEFAULTS = {
75
'annotators': 'tokenize,ssplit,pos,lemma,ner,entitymentions',
86
'ner.applyNumericClassifiers': 'false',
@@ -25,6 +23,7 @@
2523

2624
class Classifier(NEAMProcessor):
2725
def __init__(self, options = None, tags = None):
26+
boot_java()
2827
props = CORE_NLP_DEFAULTS.copy()
2928
if options:
3029
props.update(options)
@@ -33,7 +32,7 @@ def __init__(self, options = None, tags = None):
3332
tags = tags or DEFAULT_TAGS
3433
tags = self._convert_props(tags)
3534

36-
self._classifier = NEAMClassifier(core_nlp_props, tags)
35+
self._classifier = clms.neam.classify.NEAMClassifier(core_nlp_props, tags)
3736
self._preprocesses = []
3837
self._postprocesses = []
3938

neam/python/java.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525

2626

2727
def boot_java():
28+
print("Starting Java.")
2829
src_path = os.path.join(java_dir, 'neam')
2930
jar_paths = [os.path.join(lib_dir, jar) for jar in JARS]
3031
load_paths = [src_path] + jar_paths
@@ -50,7 +51,6 @@ def install_corenlp():
5051

5152

5253
install_corenlp()
53-
boot_java()
5454
clms = JPackage('clms')
5555

5656
__all__ = ['java', 'clms']

neam/python/neam.py

+11-13
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,7 @@
22
from neam.python.classification import *
33

44

5-
def main():
6-
args = load_args()
7-
5+
def main(input_file, model=None, year=1900):
86
pipeline = Pipeline([
97
#################
108
# Preprocessing #
@@ -18,7 +16,7 @@ def main():
1816
###########
1917

2018
# Run Stanford CoreNLP to tag named entities and dates
21-
load_classifier(args),
19+
load_classifier(model),
2220
# Tag all of the titles using a custom trained MaxEnt classifier
2321
TitleAnnotator(),
2422
# Replace page numbers with <pb> tags
@@ -35,7 +33,7 @@ def main():
3533
# Move any of the following titles inside tags that occur directly to their right
3634
TagExpander(tags=['persName'], words=['the', 'Mr.', 'Mrs.', 'Ms.', 'Miss', 'Lady', 'Dr.', 'Maj.', 'Col.', 'Capt.', 'Rev', 'SS', 'S.S.', 'Contessa', 'Judge']),
3735
# Add in the <p> and <div> tags
38-
JournalShaper('EBA', args.year),
36+
JournalShaper('EBA', year),
3937
# Check tags against Wikipedia
4038
WikiRetagger(tags=['placeName', 'orgName']),
4139
# Set the ref attribute of named entity tags
@@ -51,16 +49,14 @@ def main():
5149
Beautifier()
5250
])
5351

54-
with open(args.file, encoding="utf-8") as input_file:
55-
text = ''.join(input_file)
56-
57-
print(pipeline.run(text))
52+
text = ''.join(input_file)
53+
return pipeline.run(text)
5854

5955

60-
def load_classifier(args):
56+
def load_classifier(model):
6157
props = {}
62-
if args.model:
63-
props["ner.model"] = args.model
58+
if model:
59+
props["ner.model"] = model
6460
return Classifier(props)
6561

6662

@@ -74,5 +70,7 @@ def load_args():
7470

7571

7672
if __name__ == '__main__':
77-
main()
73+
args = load_args()
74+
with open(args.file, encoding="utf-8") as input_file:
75+
print(main(input_file, args.model))
7876

0 commit comments

Comments
 (0)