Skip to content

Commit 674c986

Browse files
manningStanford NLP
authored and
Stanford NLP
committed
Merge remote-tracking branch 'refs/remotes/origin/master'
1 parent de78e10 commit 674c986

22 files changed

+22
-20
lines changed

itest/src/edu/stanford/nlp/pipeline/POSTaggerBenchmarkITest.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ public void testEnglishBiDirectionalPOSModelAccuracy() {
8282
String englishPOSTestPath = "/u/nlp/data/pos-tagger/english/test-wsj-22-24";
8383
List<String> sentences = readInPOSData(englishPOSTestPath);
8484
double ENGLISH_BIDIRECTIONAL_TOKEN_ACCURACY = .972;
85-
double ENGLISH_BIDIRECTIONAL_SENTENCE_ACCURACY = .564;
85+
double ENGLISH_BIDIRECTIONAL_SENTENCE_ACCURACY = .563;
8686
runPOSTest(sentences, "_", englishBiDirectionalPipeline,
8787
ENGLISH_BIDIRECTIONAL_TOKEN_ACCURACY, ENGLISH_BIDIRECTIONAL_SENTENCE_ACCURACY,
8888
"English BiDirectional", false);
-1.62 MB
Binary file not shown.

lib/lucene-analyzers-common-7.5.0.jar

1.58 MB
Binary file not shown.

lib/lucene-core-4.10.3.jar

-2.44 MB
Binary file not shown.

lib/lucene-core-7.5.0.jar

2.91 MB
Binary file not shown.

lib/lucene-demo-4.10.3.jar

-49.5 KB
Binary file not shown.

lib/lucene-demo-7.5.0.jar

42.9 KB
Binary file not shown.

lib/lucene-queries-4.10.3.jar

-208 KB
Binary file not shown.

lib/lucene-queries-7.5.0.jar

259 KB
Binary file not shown.

lib/lucene-queryparser-4.10.3.jar

-382 KB
Binary file not shown.

lib/lucene-queryparser-7.5.0.jar

373 KB
Binary file not shown.

libsrc/lucene-4.10.3-src.tgz

-28 MB
Binary file not shown.
1.56 MB
Binary file not shown.

libsrc/lucene-core-7.5.0-sources.jar

2.01 MB
Binary file not shown.

libsrc/lucene-demo-7.5.0-sources.jar

38.3 KB
Binary file not shown.
149 KB
Binary file not shown.
379 KB
Binary file not shown.

pom.xml

+4-4
Original file line numberDiff line numberDiff line change
@@ -57,25 +57,25 @@
5757
<dependency>
5858
<groupId>org.apache.lucene</groupId>
5959
<artifactId>lucene-queryparser</artifactId>
60-
<version>4.10.3</version>
60+
<version>7.5.0</version>
6161
</dependency>
6262

6363
<dependency>
6464
<groupId>org.apache.lucene</groupId>
6565
<artifactId>lucene-analyzers-common</artifactId>
66-
<version>4.10.3</version>
66+
<version>7.5.0</version>
6767
</dependency>
6868

6969
<dependency>
7070
<groupId>org.apache.lucene</groupId>
7171
<artifactId>lucene-queries</artifactId>
72-
<version>4.10.3</version>
72+
<version>7.5.0</version>
7373
</dependency>
7474

7575
<dependency>
7676
<groupId>org.apache.lucene</groupId>
7777
<artifactId>lucene-core</artifactId>
78-
<version>4.10.3</version>
78+
<version>7.5.0</version>
7979
</dependency>
8080

8181
<dependency>

src/edu/stanford/nlp/patterns/LuceneSentenceIndex.java

+6-5
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ public class LuceneSentenceIndex<E extends Pattern> extends SentenceIndex<E> {
6868
// }
6969

7070
//StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_42);
71-
IndexWriterConfig iwc=new IndexWriterConfig(Version.LUCENE_42, analyzer);
71+
IndexWriterConfig iwc=new IndexWriterConfig(analyzer);
7272
DirectoryReader reader = null;
7373
IndexSearcher searcher;
7474
ProtobufAnnotationSerializer p = new ProtobufAnnotationSerializer();
@@ -82,7 +82,7 @@ public LuceneSentenceIndex(Properties props, Set<String> stopWords, String index
8282

8383

8484
void setIndexReaderSearcher() throws IOException {
85-
FSDirectory index = FSDirectory.open(indexDir);
85+
FSDirectory index = FSDirectory.open(indexDir.toPath());
8686
if(reader == null){
8787
reader = DirectoryReader.open(index);
8888
searcher = new IndexSearcher(reader);
@@ -126,16 +126,17 @@ void setIndexReaderSearcher() throws IOException {
126126
* **/
127127
Set<String> queryIndexGetSentences(CollectionValuedMap<String, String> words) throws IOException, ParseException {
128128
setIndexReaderSearcher();
129-
BooleanQuery query = new BooleanQuery();
129+
BooleanQuery.Builder builder = new BooleanQuery.Builder();
130130
String pkey = Token.getKeyForClass(PatternsAnnotations.ProcessedTextAnnotation.class);
131131

132132
for(Map.Entry<String, Collection<String>> en: words.entrySet()){
133133
boolean processedKey = en.getKey().equals(pkey);
134134
for(String en2: en.getValue()){
135135
if(!processedKey || !stopWords.contains(en2.toLowerCase()))
136-
query.add(new BooleanClause(new TermQuery(new Term(en.getKey(), en2)), BooleanClause.Occur.MUST));
136+
builder.add(new BooleanClause(new TermQuery(new Term(en.getKey(), en2)), BooleanClause.Occur.MUST));
137137
}
138138
}
139+
BooleanQuery query = builder.build();
139140
//query.add(new BooleanClause(new TermQuery(new Term("textannotation","sonal")), BooleanClause.Occur.MUST));
140141

141142
// String queryStr = "";
@@ -304,7 +305,7 @@ public void update(List<CoreLabel> tokens, String sentid) {
304305

305306
void setIndexWriter() {try{
306307
if(indexWriter == null){
307-
dir = FSDirectory.open(indexDir);
308+
dir = FSDirectory.open(indexDir.toPath());
308309
Redwood.log(Redwood.DBG, "Updating lucene index at " + indexDir);
309310
indexWriter = new IndexWriter(dir, iwc);
310311
}}catch(IOException e){

src/edu/stanford/nlp/patterns/surface/PatternsForEachTokenLucene.java

+4-4
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ public class PatternsForEachTokenLucene<E extends Pattern> extends PatternsForEa
3333

3434
static Analyzer analyzer = new KeywordAnalyzer();
3535

36-
static IndexWriterConfig iwc=new IndexWriterConfig(Version.LUCENE_42, analyzer);
36+
static IndexWriterConfig iwc=new IndexWriterConfig(analyzer);
3737
static DirectoryReader reader = null;
3838
static IndexSearcher searcher;
3939

@@ -77,7 +77,7 @@ public PatternsForEachTokenLucene(Properties props, Map<String, Map<Integer, Set
7777

7878
public void checkClean(){
7979
try {
80-
dir = FSDirectory.open(indexDir);
80+
dir = FSDirectory.open(indexDir.toPath());
8181
CheckIndex checkIndex = new CheckIndex(dir);
8282
CheckIndex.Status status = checkIndex.checkIndex();
8383
assert (status.clean) : "index is not clean";
@@ -98,7 +98,7 @@ public void setupSearch(){
9898

9999
static synchronized void setIndexReaderSearcher() {
100100
try{
101-
FSDirectory index = NIOFSDirectory.open(indexDir);
101+
FSDirectory index = NIOFSDirectory.open(indexDir.toPath());
102102
if(reader == null){
103103
reader = DirectoryReader.open(index);
104104
searcher = new IndexSearcher(reader);
@@ -166,7 +166,7 @@ public void addPatterns(Map<String, Map<Integer, Set<E>>> pats) {
166166
static synchronized void setIndexWriter() {
167167
try{
168168
if(!openIndexWriter.get()){
169-
dir = FSDirectory.open(indexDir);
169+
dir = FSDirectory.open(indexDir.toPath());
170170
Redwood.log(Redwood.DBG, "Updating lucene index at " + indexDir);
171171
indexWriter = new IndexWriter(dir, iwc);
172172
openIndexWriter.set(true);

src/edu/stanford/nlp/pipeline/demo/corenlp-brat.html

+1-1
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
<nav class="navbar navbar-default navbar-static-top">
2929
<div class="container">
3030
<div class="navbar-header">
31-
<a class="navbar-brand" href="https://nlp.stanford.edu/software/corenlp.shtml">Stanford CoreNLP 3.9.1 (updated 2018/04/05)</a>
31+
<a class="navbar-brand" href="https://stanfordnlp.github.io/CoreNLP/">Stanford CoreNLP 3.9.1 (updated 2018/04/05)</a>
3232
</div>
3333
</div>
3434
</nav>

src/edu/stanford/nlp/util/LuceneFieldType.java

+6-5
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package edu.stanford.nlp.util;
22

33
import org.apache.lucene.document.FieldType;
4+
import org.apache.lucene.index.IndexOptions;
45

56
/** An easy way to access types of fields instead of setting variables up every time.
67
* Copied from KBPFieldType written by Angel.
@@ -24,34 +25,34 @@ public class LuceneFieldType {
2425
public static final FieldType NOT_INDEXED = new FieldType();
2526

2627
static {
27-
ANALYZED_NOT_STORED.setIndexed(true);
28+
ANALYZED_NOT_STORED.setIndexOptions(IndexOptions.DOCS);
2829
ANALYZED_NOT_STORED.setTokenized(true);
2930
ANALYZED_NOT_STORED.setStored(false);
3031
ANALYZED_NOT_STORED.setStoreTermVectors(true);
3132
ANALYZED_NOT_STORED.setStoreTermVectorPositions(true);
3233
ANALYZED_NOT_STORED.freeze();
3334

34-
ANALYZED.setIndexed(true);
35+
ANALYZED.setIndexOptions(IndexOptions.DOCS);
3536
ANALYZED.setTokenized(true);
3637
ANALYZED.setStored(true);
3738
ANALYZED.setStoreTermVectors(true);
3839
ANALYZED.setStoreTermVectorPositions(true);
3940
ANALYZED.freeze();
4041

41-
ANALYZED_NO_POSITION.setIndexed(true);
42+
ANALYZED_NO_POSITION.setIndexOptions(IndexOptions.DOCS);
4243
ANALYZED_NO_POSITION.setTokenized(true);
4344
ANALYZED_NO_POSITION.setStoreTermVectors(true);
4445
ANALYZED_NO_POSITION.setStoreTermVectorPositions(false);
4546
ANALYZED_NO_POSITION.freeze();
4647

47-
NOT_ANALYZED.setIndexed(true);
48+
NOT_ANALYZED.setIndexOptions(IndexOptions.DOCS);
4849
NOT_ANALYZED.setTokenized(false);
4950
NOT_ANALYZED.setStored(true);
5051
NOT_ANALYZED.setStoreTermVectors(false);
5152
NOT_ANALYZED.setStoreTermVectorPositions(false);
5253
NOT_ANALYZED.freeze();
5354

54-
NOT_INDEXED.setIndexed(false);
55+
NOT_INDEXED.setIndexOptions(IndexOptions.NONE);
5556
NOT_INDEXED.setTokenized(false);
5657
NOT_INDEXED.setStored(true);
5758
NOT_INDEXED.setStoreTermVectors(false);

0 commit comments

Comments
 (0)