-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathglm_parser.h
162 lines (125 loc) · 3.52 KB
/
glm_parser.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
#ifndef _GLM_PARSER_H
#define _GLM_PARSER_H
#include <vector>
#include <stdio.h>
#include <assert.h>
#include <time.h>
#include <stdlib.h>
#include <sys/types.h>
#include <dirent.h>
#include <string>
#include <unordered_map>
using namespace std;
#define ERROR(s, args...) { \
fprintf(stderr, "ERROR @ file %s line %d\n", __FILE__, __LINE__); \
fprintf(stderr, s, args); \
exit(-1); \
putchar('\n'); }
#define DEBUG(s, args...) { \
fprintf(stderr, "DEBUG @ file %s line %d\n", __FILE__, __LINE__); \
fprintf(stderr, s, args); \
putchar('\n'); }
// Maximum line length from file
#define LINE_BUFFER_MAX 512
#define SECTION_PATH_MAX 1024
#define WORD_MAX 128
#define POS_MAX 16
// State machine used to parse input file
#define STATE_FINISHED 0
#define STATE_PROCESSING 1
// Used by hash function to compute hash value on
// machines with different word size
#define _32BIT_HIGH_FIVE_BITS 0xF8000000
#define _32BIT_LOW_BIT_NUM 27
#define _64BIT_HIGH_FIVE_BITS 0xF800000000000000L
#define _64BIT_LOW_BIT_NUM 59
// Initial eisner matrix size
#define INIT_SENTENCE_LEN 100
#define MAX_EDGE_LIST_SIZE 200
struct EisnerNode
{
float score;
int mid_index;
};
struct EdgeRecoveryNode
{
int s, t, orientation, shape;
EdgeRecoveryNode(int ps, int pt, int porientation, int pshape)
{
s = ps; t = pt; orientation = porientation; shape = pshape;
}
};
typedef EisnerNode *P_EisnerNode;
typedef EisnerNode **PP_EisnerNode;
// How many bits do we leave for type, dir and dist information
#define HASH_MULTIPLIER 2897
struct Edge
{
int head_index;
int dep_index;
Edge(int hi, int di)
{
head_index = hi;
dep_index = di;
}
Edge() {}
};
struct Sentence
{
vector<string> word_list;
vector<string> pos_list;
vector<string> five_gram_word_list;
vector<bool> five_gram_flag;
vector<Edge> gold_edge_list;
};
struct SectionFile
{
string filename; // File name, no path
vector<Sentence> sentence_list;
};
struct Section
{
int section_id;
vector<SectionFile> file_list;
};
struct Context
{
int current_section; // Index into section_list
int current_file; // Index into Section.file_list
int current_sentence; // Index into SectionFile.sentence_list
int total_sentence; // Total number of sentences processed
float start_time; // Used for time accoutning
float end_time; // Same as above
Context()
{
current_section = current_file = 0;
current_sentence = -1;
total_sentence = 0;
start_time = end_time = 0.0;
}
};
struct Feature
{
string *word ;
};
///////////////////// Function Dealaration
// Used by parser to register callback
float get_first_order_feature_score(Sentence *sent, int head_index, int dep_index);
extern unordered_map<unsigned long, float> weight_vector;
inline float get_weight(unsigned long h)
{
// To save space, just falsefully return 0.0. Do not add new entry here
if(weight_vector.count(h) == 0) return 0.0;
else return weight_vector.at(h);
}
inline unsigned long pack_type_dir_dist(unsigned long type, unsigned long dir_dist)
{
return (type << 4) | dir_dist;
}
// Assumes: feature_buffer, score, h and dir_dist has already been defined
#define add_feature(type, num, offset) { \
h = hash_feature(type, num, feature_buffer + offset); \
score += get_weight(h); \
h = hash_feature(pack_type_dir_dist(type, dir_dist), num, feature_buffer + offset); \
score += h; }
#endif