-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathUnitBlockSorter.pas
160 lines (139 loc) · 4.35 KB
/
UnitBlockSorter.pas
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
unit UnitBlockSorter;
interface
uses
SysUtils, Classes,
Common, UnitThreadManager, UnitBufferedFileStream;
type
TBlockSorterThread = class(TProtoThread)
private
type
TIndexArray = array[0..0] of integer;
PIndexArray = ^TIndexArray;
var
FData: PAnsiChar;
FDataSize: integer;
FIndexes: PIndexArray; // ìàññèâ ñìåùåíèé ñòðîê îò íà÷àëà áóôåðà. õðàíèì íå â âèäå óêàçàòåëåé, à â âèäå ñìåùåíèé, ò.ê. ïðè íåîáõîäèìîñòè
// ñêîìïèëèðîâàòü ïîä x64 ìàññèâ ñ óêàçàòåëÿìè áóäåò çàíèìàòü â äâà ðàçà áîëüøå ïàìÿòè, êîòîðàÿ ó íàñ îãðàíè÷åíà
procedure QuickSort(L, R: Integer);
public
constructor Create(const Data: PAnsiChar; const DataSize: integer; const BlockIndex: integer);
procedure Execute; override;
end;
implementation
procedure TBlockSorterThread.QuickSort(L, R: Integer);
var
I, J, P, Save: Integer;
begin
repeat
I := L;
J := R;
P := (L + R) shr 1;
repeat
while CompareStrings(FData + FIndexes[I], FData + FIndexes[P]) < 0 do
Inc(I);
while CompareStrings(FData + FIndexes[J], FData + FIndexes[P]) > 0 do
Dec(J);
if I <= J then
begin
Save := FIndexes[I];
FIndexes[I] := FIndexes[J];
FIndexes[J] := Save;
if P = I then
P := J
else
if P = J then
P := I;
Inc(I);
Dec(J);
end;
until I > J;
if L < J then QuickSort(L, J);
L := I;
until I >= R;
end;
{ TStreamSorter }
constructor TBlockSorterThread.Create(const Data: PAnsiChar; const DataSize: integer; const BlockIndex: integer);
begin
inherited Create(BlockIndex);
FData := Data;
FDataSize := DataSize;
end;
procedure TBlockSorterThread.Execute;
var
LineStart, Ptr, DataEnd: PAnsiChar;
LineCount: integer;
IndexesSize: integer; // òåêóùèé ðàçìåð ìàññèâà ñìåùåíèé ñòðîê
FS: TStream;
i: integer;
IndexBlockSize: integer;
LastCRLFAdded: boolean;
begin
IndexesSize := 0;
IndexBlockSize := MergeBufferSize div (MAX_LINE_LENGTH div 2) * SizeOf(Integer); // ïðèðàùåíèå ðàçìåðà ìàññèâà èíäåêñîâ âûáåðåì êàê ñðåäíåå êîëè÷åñòâî ñòðîê, ïîìåùàþùååñÿ â áóôåð
LastCRLFAdded := false;
FIndexes := nil;
try
try
// ïîäãîòîâèì ìàññèâ FIndexes ñî ñìåùåíèÿìè êàæäîé ñòðîêè îò íà÷àëà áóôåðà
// Ïîñêîëüêó êîëè÷åñòâî ñòðîê çàðàíåå íå èçâåñòíî, òî ïàìÿòü ïîä ìàññèâ âûäåëÿåì ïîðöèÿìè ïî INDEX_BLOCK_SIZE áàéò
DataEnd := FData + FDataSize;
Ptr := FData;
LineCount := 0;
while Ptr < DataEnd do
begin
LineStart := Ptr;
while (Ptr < DataEnd) and (PWord(Ptr)^ <> CRLF) do
Inc(Ptr);
if Ptr = DataEnd then
begin
// äîøëè äî êîíöà áóôåðà è òàì íå áûëî CRLF - äîáàâèì åãî è çàïîìíèì ýòîò ôàêò, ÷òîáû ïîòîì óäàëèòü
// Äîáàâëÿòü ìîæíî, ò.ê. â UnitSplitter ìû âûäåëÿëè ïîä áóôôåð äâà ëèøíèõ áàéòà
PWord(DataEnd)^ := CRLF;
Inc(DataEnd, 2);
LastCRLFAdded := true;
end;
// ñäâèíåì óêàçàòåëü íà äâà ñèìâîëà CR è ÄÀ
Inc(Ptr, 2);
// óâåëè÷èì ðàçìåð ìàññèâîâ èíäåêñîâ è äëèí ñòðîê åñëè íàäî
if LineCount * SizeOf(integer) >= IndexesSize then
begin
Inc(IndexesSize, IndexBlockSize);
ReallocMem(FIndexes, IndexesSize);
end;
FIndexes[LineCount] := LineStart - FData;
Inc(LineCount);
end;
// Ñîðòèðóåì ñòðîêè îáû÷íûì QuickSort-îì. Ðåàëüíî ñîðòèðóþòñÿ ñìåùåíèÿ â ìàññèâå
QuickSort(0, LineCount - 1);
FS := nil; // ÷òîá êîìïèëÿòîð íå ðóãàëñÿ íà íåèíèöèàëèçèðîâàííóþ ïåðåìåííóþ
try
FS := TWriteCachedFileStream.Create(ResultFileName, FDataSize, 0, false);
except
writeln('Cannot create file ', ResultFileName);
Halt;
end;
// ñîõðàíÿåì îòñîðòèðîâàííûå ñòðîêè â ôàéë
try
for i := 0 to LineCount - 1 do
begin
Ptr := FData + FIndexes[i];
LineStart := Ptr;
while (Ptr < DataEnd) and (PWord(Ptr)^ <> CRLF) do
Inc(Ptr);
// Åñëè ýòî áûëà ïîñëåäíÿÿ ñòðîêà è áûë äîáàâëåí CRLF - åãî ïèñàòü íå íàäî
if (i < LineCount - 1) or not LastCRLFAdded then
Inc(Ptr, 2); // CRLF
FS.WriteBuffer(LineStart^, Ptr - LineStart);
end;
finally
FS.Free;
end;
finally
FreeMem(FData);
end;
finally
FreeMem(FIndexes);
end;
inherited;
end;
end.