Skip to content

Commit c12755b

Browse files
committed
feat: add missing header writer to write the missing header file with deduplicated headers
1 parent d94011e commit c12755b

File tree

6 files changed

+277
-19
lines changed

6 files changed

+277
-19
lines changed

rollup/missing_header_fields/export-headers-toolkit/cmd/dedup.go

+36-16
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,8 @@ The binary layout of the deduplicated file is as follows:
3939
log.Fatalf("Error reading output flag: %v", err)
4040
}
4141

42-
runDedup(inputFile, outputFile)
42+
seenDifficulty, seenVanity, seenSealLen := runAnalysis(inputFile)
43+
runDedup(inputFile, outputFile, seenDifficulty, seenVanity, seenSealLen)
4344
},
4445
}
4546

@@ -50,38 +51,57 @@ func init() {
5051
dedupCmd.Flags().String("output", "headers-dedup.bin", "deduplicated, binary formatted file")
5152
}
5253

53-
func runDedup(inputFile, outputFile string) {
54+
func runAnalysis(inputFile string) (seenDifficulty map[uint64]int, seenVanity map[[32]byte]bool, seenSealLen map[int]int) {
5455
reader := newHeaderReader(inputFile)
5556
defer reader.close()
5657

5758
// track header fields we've seen
58-
seenDifficulty := make(map[uint64]bool)
59-
seenVanity := make(map[[32]byte]bool)
60-
seenSealLen := make(map[int]bool)
59+
seenDifficulty = make(map[uint64]int)
60+
seenVanity = make(map[[32]byte]bool)
61+
seenSealLen = make(map[int]int)
6162

6263
reader.read(func(header *types.Header) {
63-
seenDifficulty[header.Difficulty] = true
64+
seenDifficulty[header.Difficulty]++
6465
seenVanity[header.Vanity()] = true
65-
seenSealLen[header.SealLen()] = true
66+
seenSealLen[header.SealLen()]++
6667
})
6768

68-
// Print report
69+
// Print distinct values and report
6970
fmt.Println("--------------------------------------------------")
70-
fmt.Printf("Unique values seen in the headers file (last seen block: %d):\n", reader.lastHeader.Number)
71-
fmt.Printf("Distinct count: Difficulty:%d, Vanity:%d, SealLen:%d\n", len(seenDifficulty), len(seenVanity), len(seenSealLen))
72-
fmt.Printf("--------------------------------------------------\n\n")
73-
74-
for diff := range seenDifficulty {
75-
fmt.Printf("Difficulty: %d\n", diff)
71+
for diff, count := range seenDifficulty {
72+
fmt.Printf("Difficulty %d: %d\n", diff, count)
7673
}
7774

7875
for vanity := range seenVanity {
7976
fmt.Printf("Vanity: %x\n", vanity)
8077
}
8178

82-
for sealLen := range seenSealLen {
83-
fmt.Printf("SealLen: %d\n", sealLen)
79+
for sealLen, count := range seenSealLen {
80+
fmt.Printf("SealLen %d bytes: %d\n", sealLen, count)
8481
}
82+
83+
fmt.Println("--------------------------------------------------")
84+
fmt.Printf("Unique values seen in the headers file (last seen block: %d):\n", reader.lastHeader.Number)
85+
fmt.Printf("Distinct count: Difficulty:%d, Vanity:%d, SealLen:%d\n", len(seenDifficulty), len(seenVanity), len(seenSealLen))
86+
fmt.Printf("--------------------------------------------------\n\n")
87+
88+
return seenDifficulty, seenVanity, seenSealLen
89+
}
90+
91+
func runDedup(inputFile, outputFile string, seenDifficulty map[uint64]int, seenVanity map[[32]byte]bool, seenSealLen map[int]int) {
92+
reader := newHeaderReader(inputFile)
93+
defer reader.close()
94+
95+
writer := newMissingHeaderFileWriter(outputFile, seenVanity)
96+
writer.close()
97+
98+
writer.missingHeaderWriter.writeVanities()
99+
100+
reader.read(func(header *types.Header) {
101+
writer.missingHeaderWriter.write(header)
102+
})
103+
104+
fmt.Printf("Deduplicated headers written to %s\n", outputFile)
85105
}
86106

87107
type headerReader struct {
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
package cmd
2+
3+
import (
4+
"bufio"
5+
"bytes"
6+
"io"
7+
"log"
8+
"os"
9+
"sort"
10+
11+
"github.com/scroll-tech/go-ethereum/export-headers-toolkit/types"
12+
)
13+
14+
type missingHeaderFileWriter struct {
15+
file *os.File
16+
writer *bufio.Writer
17+
18+
missingHeaderWriter *missingHeaderWriter
19+
}
20+
21+
func newMissingHeaderFileWriter(filename string, seenVanity map[[32]byte]bool) *missingHeaderFileWriter {
22+
file, err := os.Create(filename)
23+
if err != nil {
24+
log.Fatalf("Error creating file: %v", err)
25+
}
26+
27+
writer := bufio.NewWriter(file)
28+
return &missingHeaderFileWriter{
29+
file: file,
30+
writer: writer,
31+
missingHeaderWriter: newMissingHeaderWriter(writer, seenVanity),
32+
}
33+
}
34+
35+
func (m *missingHeaderFileWriter) close() {
36+
if err := m.writer.Flush(); err != nil {
37+
log.Fatalf("Error flushing writer: %v", err)
38+
}
39+
if err := m.file.Close(); err != nil {
40+
log.Fatalf("Error closing file: %v", err)
41+
}
42+
}
43+
44+
type missingHeaderWriter struct {
45+
writer io.Writer
46+
47+
sortedVanities [][32]byte
48+
sortedVanitiesMap map[[32]byte]int
49+
seenDifficulty map[uint64]int
50+
seenSealLen map[int]int
51+
}
52+
53+
func newMissingHeaderWriter(writer io.Writer, seenVanity map[[32]byte]bool) *missingHeaderWriter {
54+
// sort the vanities and assign an index to each so that we can write the index of the vanity in the header
55+
sortedVanities := make([][32]byte, 0, len(seenVanity))
56+
for vanity := range seenVanity {
57+
sortedVanities = append(sortedVanities, vanity)
58+
}
59+
sort.Slice(sortedVanities, func(i, j int) bool {
60+
return bytes.Compare(sortedVanities[i][:], sortedVanities[j][:]) < 0
61+
})
62+
sortedVanitiesMap := make(map[[32]byte]int)
63+
for i, vanity := range sortedVanities {
64+
sortedVanitiesMap[vanity] = i
65+
}
66+
67+
return &missingHeaderWriter{
68+
writer: writer,
69+
sortedVanities: sortedVanities,
70+
sortedVanitiesMap: sortedVanitiesMap,
71+
}
72+
}
73+
74+
func (m *missingHeaderWriter) writeVanities() {
75+
// write the count of unique vanities
76+
if _, err := m.writer.Write([]byte{uint8(len(m.sortedVanitiesMap))}); err != nil {
77+
log.Fatalf("Error writing unique vanity count: %v", err)
78+
}
79+
80+
// write the unique vanities
81+
for _, vanity := range m.sortedVanities {
82+
if _, err := m.writer.Write(vanity[:]); err != nil {
83+
log.Fatalf("Error writing vanity: %v", err)
84+
}
85+
}
86+
}
87+
88+
func (m *missingHeaderWriter) write(header *types.Header) {
89+
// 1. write the index of the vanity in the unique vanity list
90+
if _, err := m.writer.Write([]byte{uint8(m.sortedVanitiesMap[header.Vanity()])}); err != nil {
91+
log.Fatalf("Error writing vanity index: %v", err)
92+
}
93+
94+
// 2. write the bitmask
95+
// - bit 0: 0 if difficulty is 2, 1 if difficulty is 1
96+
// - bit 1: 0 if seal length is 65, 1 if seal length is 85
97+
// - rest: 0
98+
bitmask := uint8(0)
99+
if header.Difficulty == 1 {
100+
bitmask |= 1 << 0
101+
}
102+
if header.SealLen() == 85 {
103+
bitmask |= 1 << 1
104+
}
105+
106+
if _, err := m.writer.Write([]byte{bitmask}); err != nil {
107+
log.Fatalf("Error writing bitmask: %v", err)
108+
}
109+
110+
if _, err := m.writer.Write(header.Seal()); err != nil {
111+
log.Fatalf("Error writing seal: %v", err)
112+
}
113+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
package cmd
2+
3+
import (
4+
"bytes"
5+
"crypto/rand"
6+
"testing"
7+
8+
"github.com/stretchr/testify/assert"
9+
10+
"github.com/scroll-tech/go-ethereum/export-headers-toolkit/types"
11+
)
12+
13+
func TestMissingHeaderWriter(t *testing.T) {
14+
vanity1 := [32]byte{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01}
15+
vanity2 := [32]byte{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02}
16+
vanity8 := [32]byte{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08}
17+
18+
var expectedBytes []byte
19+
expectedBytes = append(expectedBytes, 0x03)
20+
expectedBytes = append(expectedBytes, vanity1[:]...)
21+
expectedBytes = append(expectedBytes, vanity2[:]...)
22+
expectedBytes = append(expectedBytes, vanity8[:]...)
23+
24+
seenVanity := map[[32]byte]bool{
25+
vanity8: true,
26+
vanity1: true,
27+
vanity2: true,
28+
}
29+
var buf []byte
30+
bytesBuffer := bytes.NewBuffer(buf)
31+
mhw := newMissingHeaderWriter(bytesBuffer, seenVanity)
32+
33+
mhw.writeVanities()
34+
assert.Equal(t, expectedBytes, bytesBuffer.Bytes())
35+
36+
// Header0
37+
{
38+
seal := randomSeal(65)
39+
header := types.NewHeader(0, 2, append(vanity1[:], seal...))
40+
mhw.write(header)
41+
42+
expectedBytes = append(expectedBytes, 0x00) // index 0
43+
expectedBytes = append(expectedBytes, 0x00) // difficulty 2, seal length 65
44+
expectedBytes = append(expectedBytes, seal...)
45+
assert.Equal(t, expectedBytes, bytesBuffer.Bytes())
46+
}
47+
48+
// Header1
49+
{
50+
seal := randomSeal(65)
51+
header := types.NewHeader(1, 1, append(vanity2[:], seal...))
52+
mhw.write(header)
53+
54+
expectedBytes = append(expectedBytes, 0x01) // index 1
55+
expectedBytes = append(expectedBytes, 0x01) // difficulty 1, seal length 65
56+
expectedBytes = append(expectedBytes, seal...)
57+
assert.Equal(t, expectedBytes, bytesBuffer.Bytes())
58+
}
59+
60+
// Header2
61+
{
62+
seal := randomSeal(85)
63+
header := types.NewHeader(2, 2, append(vanity2[:], seal...))
64+
mhw.write(header)
65+
66+
expectedBytes = append(expectedBytes, 0x01) // index 1
67+
expectedBytes = append(expectedBytes, 0x02) // difficulty 2, seal length 85
68+
expectedBytes = append(expectedBytes, seal...)
69+
assert.Equal(t, expectedBytes, bytesBuffer.Bytes())
70+
}
71+
72+
// Header3
73+
{
74+
seal := randomSeal(85)
75+
header := types.NewHeader(3, 1, append(vanity8[:], seal...))
76+
mhw.write(header)
77+
78+
expectedBytes = append(expectedBytes, 0x02) // index 2
79+
expectedBytes = append(expectedBytes, 0x03) // difficulty 1, seal length 85
80+
expectedBytes = append(expectedBytes, seal...)
81+
assert.Equal(t, expectedBytes, bytesBuffer.Bytes())
82+
}
83+
84+
// Header4
85+
{
86+
seal := randomSeal(65)
87+
header := types.NewHeader(4, 2, append(vanity1[:], seal...))
88+
mhw.write(header)
89+
90+
expectedBytes = append(expectedBytes, 0x00) // index 0
91+
expectedBytes = append(expectedBytes, 0x00) // difficulty 2, seal length 65
92+
expectedBytes = append(expectedBytes, seal...)
93+
assert.Equal(t, expectedBytes, bytesBuffer.Bytes())
94+
}
95+
}
96+
97+
func randomSeal(length int) []byte {
98+
buf := make([]byte, length)
99+
_, _ = rand.Read(buf)
100+
return buf
101+
}

rollup/missing_header_fields/export-headers-toolkit/go.mod

+3
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ go 1.22
55
require (
66
github.com/scroll-tech/go-ethereum v1.10.14-0.20240624092647-7da0bd5480e9
77
github.com/spf13/cobra v1.8.1
8+
github.com/stretchr/testify v1.9.0
89
)
910

1011
require (
@@ -42,6 +43,7 @@ require (
4243
github.com/mmcloughlin/addchain v0.4.0 // indirect
4344
github.com/olekukonko/tablewriter v0.0.5 // indirect
4445
github.com/pkg/errors v0.9.1 // indirect
46+
github.com/pmezard/go-difflib v1.0.0 // indirect
4547
github.com/prometheus/tsdb v0.7.1 // indirect
4648
github.com/rivo/uniseg v0.4.4 // indirect
4749
github.com/rjeczalik/notify v0.9.1 // indirect
@@ -64,5 +66,6 @@ require (
6466
golang.org/x/time v0.0.0-20210220033141-f8bda1e9f3ba // indirect
6567
gopkg.in/natefinch/npipe.v2 v2.0.0-20160621034901-c1b8fa8bdcce // indirect
6668
gopkg.in/urfave/cli.v1 v1.20.0 // indirect
69+
gopkg.in/yaml.v3 v3.0.1 // indirect
6770
rsc.io/tmplfunc v0.0.3 // indirect
6871
)

rollup/missing_header_fields/export-headers-toolkit/go.sum

+8
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,10 @@ github.com/jrick/logrotate v1.0.0/go.mod h1:LNinyqDIJnpAur+b8yyulnQw/wDuN1+BYKlT
100100
github.com/kkdai/bstream v0.0.0-20161212061736-f391b8402d23/go.mod h1:J+Gs4SYgM6CZQHDETBtE9HaSEkGmuNXF86RwHhHUvq4=
101101
github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515 h1:T+h1c/A9Gawja4Y9mFVWj2vyii2bbUNDw3kt9VxK2EY=
102102
github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc=
103+
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
104+
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
105+
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
106+
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
103107
github.com/leanovate/gopter v0.2.9 h1:fQjYxZaynp97ozCzfOyOuAGOU4aU/z37zf/tOujFk7c=
104108
github.com/leanovate/gopter v0.2.9/go.mod h1:U2L/78B+KVFIx2VmW6onHJQzXtFb+p5y3y2Sh+Jxxv8=
105109
github.com/mattn/go-colorable v0.1.8 h1:c1ghPdyEDarC70ftn0y+A/Ee++9zz8ljHG1b13eJ0s8=
@@ -147,6 +151,8 @@ github.com/rivo/uniseg v0.4.4 h1:8TfxU8dW6PdqD27gjM8MVNuicgxIjxpm4K7x4jp8sis=
147151
github.com/rivo/uniseg v0.4.4/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
148152
github.com/rjeczalik/notify v0.9.1 h1:CLCKso/QK1snAlnhNR/CNvNiFU2saUtjV0bx3EwNeCE=
149153
github.com/rjeczalik/notify v0.9.1/go.mod h1:rKwnCoCGeuQnwBtTSPL9Dad03Vh2n40ePRrjvIXnJho=
154+
github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ=
155+
github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog=
150156
github.com/rs/cors v1.7.0 h1:+88SsELBHx5r+hZ8TCkggzSstaWNbDvThkVK8H6f9ik=
151157
github.com/rs/cors v1.7.0/go.mod h1:gFx+x8UowdsKA9AchylcLynDq+nNFfI8FkUZdN/jGCU=
152158
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
@@ -235,6 +241,8 @@ google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzi
235241
google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
236242
gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw=
237243
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
244+
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
245+
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
238246
gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys=
239247
gopkg.in/natefinch/npipe.v2 v2.0.0-20160621034901-c1b8fa8bdcce h1:+JknDZhAj8YMt7GC73Ei8pv4MzjDUNPHgQWJdtMAaDU=
240248
gopkg.in/natefinch/npipe.v2 v2.0.0-20160621034901-c1b8fa8bdcce/go.mod h1:5AcXVHNjg+BDxry382+8OKon8SEWiKktQR07RKPsv1c=

rollup/missing_header_fields/export-headers-toolkit/types/header.go

+16-3
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,22 @@ import (
66
)
77

88
const HeaderSizeSerialized = 2
9+
const VanitySize = 32
910

1011
type Header struct {
1112
Number uint64
1213
Difficulty uint64
1314
ExtraData []byte
1415
}
1516

17+
func NewHeader(number, difficulty uint64, extraData []byte) *Header {
18+
return &Header{
19+
Number: number,
20+
Difficulty: difficulty,
21+
ExtraData: extraData,
22+
}
23+
}
24+
1625
func (h *Header) String() string {
1726
return fmt.Sprintf("%d,%d,0x%x\n", h.Number, h.Difficulty, h.ExtraData)
1827
}
@@ -29,12 +38,16 @@ func (h *Header) Bytes() ([]byte, error) {
2938
return buf, nil
3039
}
3140

32-
func (h *Header) Vanity() [32]byte {
33-
return [32]byte(h.ExtraData[:32])
41+
func (h *Header) Vanity() [VanitySize]byte {
42+
return [VanitySize]byte(h.ExtraData[:VanitySize])
43+
}
44+
45+
func (h *Header) Seal() []byte {
46+
return h.ExtraData[VanitySize:]
3447
}
3548

3649
func (h *Header) SealLen() int {
37-
return len(h.ExtraData[32:])
50+
return len(h.Seal())
3851
}
3952

4053
// FromBytes reads the header from the byte representation excluding the initial 2 bytes for the size.

0 commit comments

Comments
 (0)