Skip to content

Commit c2d29b4

Browse files
committed
Merge branch 'hotfix-1.30.6'
2 parents 8fddda7 + c1878a9 commit c2d29b4

File tree

14 files changed

+122
-104
lines changed

14 files changed

+122
-104
lines changed

gemma-cli/pom.xml

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
<parent>
44
<artifactId>gemma</artifactId>
55
<groupId>gemma</groupId>
6-
<version>1.30.5</version>
6+
<version>1.30.6</version>
77
</parent>
88
<modelVersion>4.0.0</modelVersion>
99
<artifactId>gemma-cli</artifactId>

gemma-core/pom.xml

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
<parent>
44
<artifactId>gemma</artifactId>
55
<groupId>gemma</groupId>
6-
<version>1.30.5</version>
6+
<version>1.30.6</version>
77
</parent>
88
<modelVersion>4.0.0</modelVersion>
99
<artifactId>gemma-core</artifactId>

gemma-core/src/main/java/ubic/gemma/core/loader/expression/geo/service/EmptyMinimlDocumentException.java

-10
This file was deleted.

gemma-core/src/main/java/ubic/gemma/core/loader/expression/geo/service/GeoBrowser.java

+63-30
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,12 @@ public Collection<GeoRecord> getGeoRecords( Collection<String> accessions ) thro
174174

175175
private void getGeoBasicRecords( List<GeoRecord> records, String searchUrlString ) throws IOException {
176176
URL searchUrl = new URL( searchUrlString );
177-
Document searchDocument = parseMiniMLDocument( searchUrl );
177+
Document searchDocument;
178+
try {
179+
searchDocument = parseMiniMLDocument( searchUrl );
180+
} catch ( EmptyXmlDocumentException e ) {
181+
throw new RuntimeException( "Empty MINiML document for " + searchUrl, e );
182+
}
178183

179184
NodeList countNode = searchDocument.getElementsByTagName( "Count" );
180185
Node countEl = countNode.item( 0 );
@@ -201,8 +206,8 @@ private void getGeoBasicRecords( List<GeoRecord> records, String searchUrlString
201206
t.start();
202207

203208
NodeList accNodes, titleNodes, sampleNodes, dateNodes, orgnNodes, platformNodes, summaryNodes, typeNodes, pubmedNodes;
204-
Document summaryDocument = parseMiniMLDocument( fetchUrl );
205209
try {
210+
Document summaryDocument = parseMiniMLDocument( fetchUrl );
206211
accNodes = ( NodeList ) xaccession.evaluate( summaryDocument, XPathConstants.NODESET );
207212
titleNodes = ( NodeList ) xtitle.evaluate( summaryDocument, XPathConstants.NODESET );
208213
sampleNodes = ( NodeList ) xnumSamples.evaluate( summaryDocument, XPathConstants.NODESET );
@@ -212,6 +217,8 @@ private void getGeoBasicRecords( List<GeoRecord> records, String searchUrlString
212217
summaryNodes = ( NodeList ) xsummary.evaluate( summaryDocument, XPathConstants.NODESET );
213218
typeNodes = ( NodeList ) xtype.evaluate( summaryDocument, XPathConstants.NODESET );
214219
pubmedNodes = ( NodeList ) xpubmed.evaluate( summaryDocument, XPathConstants.NODESET );
220+
} catch ( EmptyXmlDocumentException e ) {
221+
throw new RuntimeException( "Empty MINiML document for " + fetchUrl, e );
215222
} catch ( XPathExpressionException e ) {
216223
throw new RuntimeException( String.format( "Failed to parse XML for %s", fetchUrl ), e );
217224
}
@@ -282,7 +289,12 @@ public Collection<GeoRecord> getAllGEOPlatforms() throws IOException {
282289
}
283290

284291
URL searchUrl = new URL( searchUrlString );
285-
Document searchDocument = parseMiniMLDocument( searchUrl );
292+
Document searchDocument;
293+
try {
294+
searchDocument = parseMiniMLDocument( searchUrl );
295+
} catch ( EmptyXmlDocumentException e ) {
296+
throw new RuntimeException( "Got an empty MINiML document for " + searchUrl, e );
297+
}
286298

287299
NodeList countNode = searchDocument.getElementsByTagName( "Count" );
288300
Node countEl = countNode.item( 0 );
@@ -309,18 +321,19 @@ public Collection<GeoRecord> getAllGEOPlatforms() throws IOException {
309321
StopWatch t = new StopWatch();
310322
t.start();
311323

312-
Document summaryDocument = parseMiniMLDocument( fetchUrl );
313324
NodeList accNodes, titleNodes, dateNodes, orgnNodes, summaryNodes, techNodes;
314325
try {
326+
Document summaryDocument = parseMiniMLDocument( fetchUrl );
315327
accNodes = ( NodeList ) xPlataccession.evaluate( summaryDocument, XPathConstants.NODESET );
316328
titleNodes = ( NodeList ) xtitle.evaluate( summaryDocument, XPathConstants.NODESET );
317329
summaryNodes = ( NodeList ) xsummary.evaluate( summaryDocument, XPathConstants.NODESET );
318330
techNodes = ( NodeList ) xPlatformTech.evaluate( summaryDocument, XPathConstants.NODESET );
319331
orgnNodes = ( NodeList ) xorganisms.evaluate( summaryDocument, XPathConstants.NODESET );
320332
dateNodes = ( NodeList ) xreleaseDate.evaluate( summaryDocument, XPathConstants.NODESET );
333+
} catch ( EmptyXmlDocumentException e ) {
334+
throw new RuntimeException( "Got an empty MINiML document for " + fetchUrl, e );
321335
} catch ( XPathExpressionException e ) {
322-
log.error( "Could not parse data: " + searchUrl, e );
323-
return Collections.emptyList();
336+
throw new RuntimeException( "Could not parse data for " + searchUrl, e );
324337
}
325338

326339
// consider n_samples (number of elements) and the number of GSEs, but not every record has them, so it would be trickier.
@@ -352,8 +365,8 @@ public Collection<GeoRecord> getAllGEOPlatforms() throws IOException {
352365

353366
/**
354367
* Provides more details than getRecentGeoRecords. Performs an E-utilities query of the GEO database with the given
355-
* searchTerms (search terms can be ommitted). Returns at most pageSize records. Does some screening of results for
356-
* expression studies, and (optionally) taxa. This is used for identifying data sets for loading
368+
* searchTerms (search terms can be omitted). Returns at most pageSize records. Does some screening of results for
369+
* expression studies, and (optionally) taxa. This is used for identifying data sets for loading.
357370
*
358371
* @param start start an offset to retrieve batches
359372
* @param pageSize page size how many to retrive
@@ -392,7 +405,12 @@ public List<GeoRecord> getGeoRecordsBySearchTerm( String searchTerms, int start,
392405
}
393406

394407
URL searchUrl = new URL( searchUrlString );
395-
Document searchDocument = parseMiniMLDocument( searchUrl );
408+
Document searchDocument;
409+
try {
410+
searchDocument = parseMiniMLDocument( searchUrl );
411+
} catch ( EmptyXmlDocumentException e ) {
412+
throw new RuntimeException( "Got an empty MINiML document for " + searchUrl, e );
413+
}
396414

397415
NodeList countNode = searchDocument.getElementsByTagName( "Count" );
398416
Node countEl = countNode.item( 0 );
@@ -422,9 +440,9 @@ public List<GeoRecord> getGeoRecordsBySearchTerm( String searchTerms, int start,
422440
t.start();
423441
int rawRecords = 0;
424442

425-
Document summaryDocument = parseMiniMLDocument( fetchUrl );
426443
NodeList accNodes, titleNodes, sampleNodes, dateNodes, orgnNodes, platformNodes, summaryNodes, typeNodes, pubmedNodes;
427444
try {
445+
Document summaryDocument = parseMiniMLDocument( fetchUrl );
428446
accNodes = ( NodeList ) xaccession.evaluate( summaryDocument, XPathConstants.NODESET );
429447
titleNodes = ( NodeList ) xtitle.evaluate( summaryDocument, XPathConstants.NODESET );
430448
sampleNodes = ( NodeList ) xnumSamples.evaluate( summaryDocument, XPathConstants.NODESET );
@@ -435,6 +453,8 @@ public List<GeoRecord> getGeoRecordsBySearchTerm( String searchTerms, int start,
435453
typeNodes = ( NodeList ) xtype.evaluate( summaryDocument, XPathConstants.NODESET );
436454
pubmedNodes = ( NodeList ) xpubmed.evaluate( summaryDocument, XPathConstants.NODESET );
437455
// NodeList sampleLists = ( NodeList ) xsamples.evaluate( summaryDocument, XPathConstants.NODESET );
456+
} catch ( EmptyXmlDocumentException e ) {
457+
throw new RuntimeException( "Got an empty MINiML document for " + fetchUrl, e );
438458
} catch ( XPathExpressionException e ) {
439459
throw new RuntimeException( String.format( "Failed to parse XML for %s", searchUrl ), e );
440460
}
@@ -610,7 +630,7 @@ public List<GeoRecord> getRecentGeoRecords( int startPage, int pageSize ) throws
610630
* exposed for testing
611631
*
612632
*/
613-
void parseMINiML( GeoRecord record, Document detailsDocument ) throws IOException {
633+
void parseMINiML( GeoRecord record, Document detailsDocument ) {
614634
// e.g. https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE180363&targ=gse&form=xml&view=full
615635
NodeList relTypeNodes;
616636
String overallDesign;
@@ -706,15 +726,15 @@ private void getDetails( GeoRecord record ) {
706726
*/
707727
try {
708728
parseMINiML( record, parseMiniMLDocument( miniMLURL ) );
709-
} catch ( IOException e ) {
729+
} catch ( EmptyXmlDocumentException | IOException e ) {
710730
log.error( e.getMessage() + " while processing MINiML for " + record.getGeoAccession()
711731
+ ", subseries status will not be determined." );
712732
}
713733
}
714734

715735
try {
716736
getSampleDetails( record );
717-
} catch ( EmptyMinimlDocumentException | IOException e ) {
737+
} catch ( EmptyXmlDocumentException | IOException e ) {
718738
log.error( e.getMessage() + " while processing MINiML for " + record.getGeoAccession()
719739
+ ", sample details will not be obtained" );
720740
}
@@ -761,7 +781,7 @@ private void getMeshHeadings( GeoRecord record ) throws IOException {
761781
* Fetch and parse MINiML for samples.
762782
*
763783
*/
764-
private void getSampleDetails( GeoRecord record ) throws IOException {
784+
private void getSampleDetails( GeoRecord record ) throws EmptyXmlDocumentException, IOException {
765785
// Fetch miniML for the samples.
766786
// e.g. https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE171682&targ=gsm&form=xml&view=full
767787
URL sampleMINIMLURL = new URL( "https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?targ=gsm&form=xml&view=full&acc=" + urlEncode( record.getGeoAccession() ) );
@@ -806,18 +826,16 @@ private String urlEncode( String s ) {
806826
* @throws IOException if there is a problem while manipulating the file or if the number of records in the document
807827
* exceeds {@link #MAX_MINIML_RECORD_SIZE}
808828
*/
809-
Document parseMiniMLDocument( URL url ) throws IOException {
829+
Document parseMiniMLDocument( URL url ) throws EmptyXmlDocumentException, IOException {
810830
return parseMiniMLDocument( url, MAX_RETRIES, null );
811831
}
812832

813-
private Document parseMiniMLDocument( URL url, int maxRetries, @Nullable IOExceptionWithRetry errorFromPreviousAttempt ) throws IOException {
833+
private Document parseMiniMLDocument( URL url, int maxRetries, @Nullable IOExceptionWithRetry errorFromPreviousAttempt ) throws EmptyXmlDocumentException, IOException {
814834
try ( InputStream is = openUrlWithMaxSize( url, MAX_MINIML_RECORD_SIZE ) ) {
815835
return GeoBrowser.docFactory.newDocumentBuilder().parse( is );
816836
} catch ( ParserConfigurationException | SAXException e ) {
817-
if ( isCausedByAnEmptyMinimlDocument( e ) ) {
818-
throw new EmptyMinimlDocumentException( e );
819-
} else if ( isLikelyCausedByAPrivateGeoRecord( e ) ) {
820-
throw new LikelyNonPublicGeoRecordException( e );
837+
if ( isCausedByAnEmptyXmlDocument( e ) ) {
838+
throw new EmptyXmlDocumentException( e );
821839
} else {
822840
throw new RuntimeException( String.format( "Failed to parse MINiML from URL %s", url ), e );
823841
}
@@ -840,20 +858,20 @@ private Document parseMiniMLDocument( URL url, int maxRetries, @Nullable IOExcep
840858
}
841859
}
842860

861+
/**
862+
* Check if the given exception is eligible for being retried.
863+
* <p>
864+
* For now, just exclude inputs that are too large from being reattempted.
865+
*/
843866
private boolean isEligibleForRetry( IOException e ) {
844-
return !ExceptionUtils.hasCause( e, MinimlDocumentTooLargeException.class );
845-
}
846-
847-
private boolean isCausedByAnEmptyMinimlDocument( Exception e ) {
848-
return e instanceof SAXParseException && e.getMessage().contains( "Premature end of file." );
867+
return !ExceptionUtils.hasCause( e, InputTooLargeException.class );
849868
}
850869

851870
/**
852-
* GEO delivers an HTML document for non-public datasets
853-
* it's possible for this specific case because we're not querying a dataset in particular
871+
* Check if an excpetion is caused by an empty MINiML document.
854872
*/
855-
private boolean isLikelyCausedByAPrivateGeoRecord( Exception e ) {
856-
return e instanceof SAXParseException && e.getMessage().contains( "White spaces are required between publicId and systemId" );
873+
private boolean isCausedByAnEmptyXmlDocument( Exception e ) {
874+
return e instanceof SAXParseException && e.getMessage().contains( "Premature end of file." );
857875
}
858876

859877
/**
@@ -867,8 +885,23 @@ private InputStream openUrlWithMaxSize( URL url, long maxSize ) throws IOExcepti
867885
return new LimitedInputStream( inputStream, maxSize ) {
868886
@Override
869887
protected void raiseError( long pSizeMax, long pCount ) throws IOException {
870-
throw new MinimlDocumentTooLargeException( String.format( "Document exceeds %d B.", maxSize ) );
888+
throw new InputTooLargeException( String.format( "Document exceeds %d B.", maxSize ) );
871889
}
872890
};
873891
}
892+
893+
private static class InputTooLargeException extends IOException {
894+
public InputTooLargeException( String message ) {
895+
super( message );
896+
}
897+
}
898+
899+
/**
900+
* Exception raised when an empty XML document is encountered.
901+
*/
902+
private static class EmptyXmlDocumentException extends Exception {
903+
public EmptyXmlDocumentException( Throwable cause ) {
904+
super( "The XML document was empty", cause );
905+
}
906+
}
874907
}

gemma-core/src/main/java/ubic/gemma/core/loader/expression/geo/service/LikelyNonPublicGeoRecordException.java

-7
This file was deleted.

gemma-core/src/main/java/ubic/gemma/core/loader/expression/geo/service/MinimlDocumentTooLargeException.java

-9
This file was deleted.

gemma-core/src/main/java/ubic/gemma/model/genome/biosequence/SequenceType.java

+6-1
Original file line numberDiff line numberDiff line change
@@ -30,5 +30,10 @@ public enum SequenceType {
3030
/**
3131
* Represents a (synthetic) oligonucleotide.
3232
*/
33-
OLIGO;
33+
OLIGO,
34+
35+
/**
36+
* A placeholder element used for annotation associations for RNA-seq
37+
*/
38+
DUMMY;
3439
}

0 commit comments

Comments
 (0)