forked from venkatra/atlas-v2
-
Notifications
You must be signed in to change notification settings - Fork 0
/
news-ingestion-dataset.json
160 lines (157 loc) · 4.87 KB
/
news-ingestion-dataset.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
{
"entities": [
{
"typeName": "DataFile",
"createdBy": "ingestors_news",
"attributes": {
"qualifiedName": "news_from_reuters",
"uri": "news_from_reuters",
"name": "news_from_reuters",
"file_name_pattern": "news_from_reuters_DDMMYYY",
"directory": "/landingzone/reuters/news",
"frequency":"daily",
"user":"reuters_webscrapper",
"group":"newssource",
"format":"csv",
"server" : {"guid": -100,"typeName": "server"},
"schema":[
{ "col" : "id" ,"data_type" : "string" ,"required" : true },
{ "col" : "scrap_time" ,"data_type" : "timestamp" ,"required" : true },
{ "col" : "url" ,"data_type" : "string" ,"required" : true },
{ "col" : "headline" ,"data_type" : "string" ,"required" : true },
{ "col" : "content" ,"data_type" : "string" ,"required" : false }
]
},
"classifications": [
{ "typeName": "news" }
]
},
{ "typeName": "kafka_value_message_schema","guid":-500,
"createdBy": "ingestors_news",
"attributes": {
"qualifiedName": "news_topic.id",
"uri": "news_topic.id",
"name": "id",
"col": "id",
"data_type": "string",
"required": true
}
},
{ "typeName": "kafka_value_message_schema","guid":-501,
"createdBy": "ingestors_news",
"attributes": {
"qualifiedName": "news_topic.scrap_time",
"uri": "news_topic.scrap_time",
"name": "scrap_time",
"col": "scrap_time",
"data_type": "timestamp",
"required": true
}
},
{ "typeName": "kafka_value_message_schema","guid":-502,
"createdBy": "ingestors_news",
"attributes": {
"qualifiedName": "news_topic.url",
"uri": "news_topic.url",
"name": "url",
"col": "url",
"data_type": "string",
"required": true
}
},
{ "typeName": "kafka_value_message_schema","guid":-503,
"createdBy": "ingestors_news",
"attributes": {
"qualifiedName": "news_topic.headline",
"uri": "news_topic.headline",
"name": "headline",
"col": "headline",
"data_type": "string",
"required": true
}
},
{ "typeName": "kafka_value_message_schema","guid":-504,
"createdBy": "ingestors_news",
"attributes": {
"qualifiedName": "news_topic.content",
"uri": "news_topic.content",
"name": "content",
"col": "content",
"data_type": "string",
"required": false
}
},
{
"typeName": "kafka_topic_and_schema",
"createdBy": "ingestors_news",
"attributes": {
"qualifiedName": "news_topic",
"uri": "news_topic",
"name": "news_topic",
"topic":"news_topic",
"description": "Topics on to news from different websites are injected",
"owner": "ingestors_news",
"key_schema":[
{ "typeName": "kafka_value_message_schema","guid":-500 }
],
"value_schema":[
{ "typeName": "kafka_value_message_schema","guid":-500 },
{ "typeName": "kafka_value_message_schema","guid":-501 },
{ "typeName": "kafka_value_message_schema","guid":-502 },
{ "typeName": "kafka_value_message_schema","guid":-503 },
{ "typeName": "kafka_value_message_schema","guid":-504 }
]
},
"classifications": [
{ "typeName": "news" }
]
},
{
"typeName": "hbase_table",
"createdBy": "ingestors",
"guid":-110 ,
"attributes": {
"qualifiedName": "news:news_from_reuters",
"uri": "news:news_from_reuters",
"name": "news_from_reuters",
"description": "Table which contains news scrapped from reuters",
"owner": "ingestors"
}
},
{
"typeName": "hbase_column_family",
"createdBy": "ingestors",
"guid":-111,
"attributes": {
"qualifiedName": "news:news_from_reuters.z",
"uri": "news:news_from_reuters.z",
"name": "z",
"table":{ "qualifiedName":"news:news_from_reuters" ,"guid":-110 ,"typeName": "hbase_table"}
}
}
,{
"typeName": "hbase_column",
"createdBy": "ingestors",
"attributes": {
"qualifiedName": "news:news_from_reuters.z.message",
"uri": "news:news_from_reuters.z.message",
"name": "message",
"type": "string",
"column_family":{ "qualifiedName":"news:news_from_reuters.z" ,"guid":-111 ,"typeName": "hbase_column_family"}
}
}
],
"referredEntities": {
"-100": {
"guid": "-100",
"typeName": "server",
"attributes": {
"qualifiedName": "landing_zone_server_1@dev",
"uri": "landing_zone_server_1@dev",
"name": "landing_zone_server_1",
"dns_name": "pluto",
"ip_address": "10.71.68.93"
}
}
}
}