1
1
import { Injectable } from '@angular/core' ;
2
2
import { FileService } from "../file-list/file.service" ;
3
- import { BehaviorSubject , concatMap , filter , find , from , last , map , mergeMap , Observable , of , tap , zip } from "rxjs" ;
3
+ import { BehaviorSubject , concatMap , filter , find , from , map , mergeMap , Observable , of , tap , zip } from "rxjs" ;
4
4
import { FileElement , isFileElement } from "../file-list/file-list.component" ;
5
5
import { Rule , RuleRepository } from "./rule.repository" ;
6
6
import { FilesCacheService } from "../files-cache/files-cache.service" ;
@@ -34,25 +34,42 @@ export class RuleService {
34
34
this . worker = new Worker ( new URL ( './rule.worker' , import . meta. url ) ) ;
35
35
const pdfWorkerSrc = `https://cdnjs.cloudflare.com/ajax/libs/pdf.js/${ pdfjs . version } /pdf.worker.min.js` ;
36
36
pdfjs . GlobalWorkerOptions . workerSrc = pdfWorkerSrc ;
37
+
38
+ //TODO: mark each file to know which rules have already been run and when, then every time we load the page,
39
+ // we check for all pair of rules/files which have not run or are outdated
40
+ }
41
+
42
+ private static isRuleRunNeeded ( rules : Rule [ ] , file : FileElement ) {
43
+ for ( const rule of rules ) {
44
+ let previousFileRun = rule . fileRuns ?. find ( fileRun => fileRun . id === file . id ) ;
45
+ if ( previousFileRun && previousFileRun . value ) {
46
+ // We already know the matching rule
47
+ return false ;
48
+ }
49
+ if ( ! previousFileRun ) {
50
+ // There is a rule we need to run which has not been run before
51
+ return true ;
52
+ }
53
+ }
54
+ // All rules have already been run
55
+ return false ;
37
56
}
38
57
39
58
runAll ( ) : Observable < void > {
40
59
return from ( this . ruleRepository . findAll ( ) )
41
60
. pipe ( mergeMap ( rules => {
42
61
let fileOrFolders = this . filesCacheService . getAll ( )
43
62
// Get all files
44
- let files = fileOrFolders . filter ( file => isFileElement ( file ) )
45
- . map ( value => value as FileElement ) ;
63
+ let files = fileOrFolders
64
+ . filter ( ( file ) : file is FileElement => isFileElement ( file ) ) ;
65
+
46
66
47
67
// Run the script for each file to get the associated category
48
68
// The amount of step is one download per file and one per rule running for each file
49
69
let stepAmount = files . length * ( 1 + rules . length ) ;
50
70
let progress = this . backgroundTaskService . showProgress ( 'Running all rules' , '' , stepAmount ) ;
51
- return this . computeFileToCategoryMap ( files , rules , progress )
52
- . pipe ( mergeMap ( fileToCategoryMap => {
53
- // Set the computed category for each files
54
- return this . setAllFileCategory ( fileToCategoryMap ) ;
55
- } ) , tap ( { complete : ( ) => progress . next ( { value : 100 , index : stepAmount } ) } ) )
71
+ return this . runAllAndSetCategories ( files , rules , progress )
72
+ . pipe ( tap ( { complete : ( ) => progress . next ( { value : 100 , index : stepAmount } ) } ) ) ;
56
73
} ) ) ;
57
74
}
58
75
@@ -73,63 +90,97 @@ export class RuleService {
73
90
}
74
91
75
92
/**
76
- * Run the given rules on the given files and return the associated category for each file that got a matching rule
93
+ * Run the given rules on the given files and set the associated category for each file that got a matching rule
77
94
*/
78
- private computeFileToCategoryMap ( files : FileElement [ ] , rules : Rule [ ] , progress : BehaviorSubject < Progress > ) {
79
- let fileToCategoryMap = new Map < FileElement , string [ ] > ( ) ;
95
+ private runAllAndSetCategories ( files : FileElement [ ] , rules : Rule [ ] , progress : BehaviorSubject < Progress > ) {
80
96
return zip ( from ( files )
81
97
. pipe ( concatMap ( ( file , fileIndex ) => {
82
98
let progressIndex = 1 + fileIndex * ( rules . length + 1 ) ;
83
-
84
- let fileContentObservable : Observable < string > ;
85
- if ( this . isFileContentReadable ( file ) ) {
86
- progress . next ( {
87
- index : progressIndex ,
88
- value : 0 ,
89
- description : "Downloading file content of '" + file . name + "'"
90
- } ) ;
91
- fileContentObservable = this . fileService . downloadFile ( file , progress )
92
- . pipe ( mergeMap ( blobContent => {
93
- if ( file . mimeType === 'application/pdf' ) {
94
- return fromPromise ( blobContent . arrayBuffer ( )
95
- . then ( arrayBuffer => pdfjs . getDocument ( arrayBuffer ) . promise )
96
- . then ( pdfDocument => pdfDocument . getPage ( 1 ) )
97
- . then ( firstPage => firstPage . getTextContent ( ) )
98
- . then ( textContent => textContent . items
99
- . filter ( ( item ) : item is TextItem => item !== undefined )
100
- . map ( item => "" + item . str ) . join ( ) ) ) ;
101
- } else {
102
- return fromPromise ( blobContent . text ( ) ) ;
103
- }
104
- } ) ) ;
105
- } else {
106
- fileContentObservable = of ( "" ) ;
99
+ if ( ! RuleService . isRuleRunNeeded ( rules , file ) ) {
100
+ return of ( undefined ) ;
107
101
}
108
- return fileContentObservable . pipe (
102
+ return this . getFileContent ( file , progress , progressIndex ) . pipe (
109
103
mergeMap ( fileContent => {
110
104
// Find the first rule which matches
111
- return from ( rules ) . pipe ( concatMap ( ( rule , ruleIndex ) => {
112
- progress . next ( {
113
- index : progressIndex + 1 + ruleIndex ,
114
- value : 0 ,
115
- description : "Running rule '" + rule . name + "' for '" + file . name + "'"
116
- } ) ;
117
- return this . run ( rule , file , fileContent , progress , progressIndex + 1 + ruleIndex ) ;
118
- } ) ,
119
- // Find will stop running further scripts once we got a match
120
- find ( result => {
121
- return result . value ;
122
- } ) ,
123
- map ( result => {
105
+ return this . runAllRules ( rules , progress , progressIndex , file , fileContent )
106
+ . pipe ( mergeMap ( result => {
107
+ // TODO: do the call to change the category immediately instead of constructing this map
108
+ // TODO: How to handle rules that have not run due to finding another matching rule? flag the matching files?
124
109
if ( result ) {
125
- fileToCategoryMap . set ( file , result . rule . category ) ;
110
+ return this . findOrCreateCategories ( Object . assign ( [ ] , result . rule . category ) , this . filesCacheService . getBaseFolder ( ) )
111
+ // There is no need to set the category if the current category is correct
112
+ . pipe ( filter ( categoryId => file . parentId !== categoryId ) ,
113
+ mergeMap ( categoryId => {
114
+ return this . fileService . setCategory ( file . id , categoryId ) ;
115
+ } ) ) ;
116
+ } else {
117
+ return of ( ) ;
126
118
}
127
119
} ) ) ;
128
120
} ) ) ;
129
121
} ) ) )
130
- . pipe ( last ( ) ,
131
- map ( ( ) => fileToCategoryMap ) ) ;
122
+ . pipe ( map ( ( ) => {
123
+ } ) ) ;
124
+
125
+ }
132
126
127
+ private runAllRules ( rulesToRun : Rule [ ] , progress : BehaviorSubject < Progress > , progressIndex : number , file : FileElement , fileContent : string ) {
128
+ return from ( rulesToRun ) . pipe ( concatMap ( ( rule , ruleIndex ) => {
129
+ let previousFileRun = rule . fileRuns ?. find ( fileRun => fileRun . id === file . id ) ;
130
+ if ( previousFileRun ) {
131
+ // The rule was run previously, so we already know the result
132
+ let result : RuleResult = {
133
+ rule : rule ,
134
+ value : previousFileRun . value
135
+ } ;
136
+ return of ( result ) ;
137
+ }
138
+ progress . next ( {
139
+ index : progressIndex + 1 + ruleIndex ,
140
+ value : 0 ,
141
+ description : "Running rule '" + rule . name + "' for '" + file . name + "'"
142
+ } ) ;
143
+ return this . run ( rule , file , fileContent , progress , progressIndex + 1 + ruleIndex )
144
+ . pipe ( tap ( ( result ) => {
145
+ // Add this file run to the rule fileRuns to avoid doing the same run again
146
+ let rule = result . rule ;
147
+ if ( ! rule . fileRuns ) {
148
+ rule . fileRuns = [ ] ;
149
+ }
150
+ rule . fileRuns . push ( { id : file . id , value : result . value } ) ;
151
+ this . ruleRepository . update ( rule ) ;
152
+ } ) ) ;
153
+ } ) ,
154
+ // Find will stop running further scripts once we got a match
155
+ find ( result => {
156
+ return result . value ;
157
+ } ) ) ;
158
+ }
159
+
160
+ private getFileContent ( file : FileElement , progress : BehaviorSubject < Progress > , progressIndex : number ) {
161
+ if ( this . isFileContentReadable ( file ) ) {
162
+ progress . next ( {
163
+ index : progressIndex ,
164
+ value : 0 ,
165
+ description : "Downloading file content of '" + file . name + "'"
166
+ } ) ;
167
+ return this . fileService . downloadFile ( file , progress )
168
+ . pipe ( mergeMap ( blobContent => {
169
+ if ( file . mimeType === 'application/pdf' ) {
170
+ return fromPromise ( blobContent . arrayBuffer ( )
171
+ . then ( arrayBuffer => pdfjs . getDocument ( arrayBuffer ) . promise )
172
+ . then ( pdfDocument => pdfDocument . getPage ( 1 ) )
173
+ . then ( firstPage => firstPage . getTextContent ( ) )
174
+ . then ( textContent => textContent . items
175
+ . filter ( ( item ) : item is TextItem => item !== undefined )
176
+ . map ( item => "" + item . str ) . join ( ) ) ) ;
177
+ } else {
178
+ return fromPromise ( blobContent . text ( ) ) ;
179
+ }
180
+ } ) ) ;
181
+ } else {
182
+ return of ( "" ) ;
183
+ }
133
184
}
134
185
135
186
private isFileContentReadable ( file : FileElement ) {
@@ -151,27 +202,7 @@ export class RuleService {
151
202
} ) ;
152
203
}
153
204
154
- /**
155
- * Find or create the categories for each file and associate them
156
- */
157
- private setAllFileCategory ( fileToCategoryMap : Map < FileElement , string [ ] > ) : Observable < void > {
158
- let baseFolderId = this . filesCacheService . getBaseFolder ( ) ;
159
- let categoryRequests : Observable < void > [ ] = [ ] ;
160
- fileToCategoryMap
161
- . forEach ( ( category , file ) => {
162
- categoryRequests . push ( this . findOrCreateCategories ( category , baseFolderId )
163
- // There is no need to set the category if the current category is correct
164
- . pipe ( filter ( categoryId => file . parentId !== categoryId ) ,
165
- mergeMap ( categoryId => {
166
- return this . fileService . setCategory ( file . id , categoryId ) ;
167
- } ) ) ) ;
168
- } ) ;
169
- return zip ( categoryRequests ) . pipe ( map ( ( ) => {
170
- } ) ) ;
171
- }
172
-
173
205
// TODO: move and refactor duplicate to FileService
174
-
175
206
private findOrCreateCategories ( categories : string [ ] , categoryId : string ) : Observable < string > {
176
207
let categoryName = categories . shift ( ) ;
177
208
if ( categoryName !== undefined ) {
0 commit comments