-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdiscord_scraper.js
263 lines (225 loc) Β· 9.75 KB
/
discord_scraper.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
javascript:(function() {
/* When run on a Discord forum page first captures the latest 50 messages, then as the user scrolls up/backwards in time each successive 50-messages batch is prepended, finally at download or copy the messages are threaded for optimization */
/* Initialize storage */
let messages = new Map();
/* Create UI */
const panel = document.createElement('div');
panel.style.cssText = `
position: fixed;
top: 20px;
right: 20px;
background: #2f3136;
color: white;
padding: 10px;
border-radius: 8px;
z-index: 9999;
font-family: Arial, sans-serif;
display: flex;
gap: 10px;
align-items: center;
box-shadow: 0 2px 10px rgba(0,0,0,0.2);
`;
const counter = document.createElement('div');
counter.textContent = 'Messages: 0';
counter.style.marginRight = '10px';
/* Extract initial messages from DOM */
function extractInitialMessages() {
const messageList = document.querySelector('ol[data-list-id="chat-messages"]');
if (!messageList) return [];
let lastKnownAuthor = null;
const initialMessages = [];
messageList.querySelectorAll('li[class*="messageListItem"]').forEach(li => {
const messageDiv = li.querySelector('div[class*="message_"]');
if (!messageDiv) return;
const messageId = li.id.split('-').pop();
/* Extract author with fallback to last known */
const headerText = messageDiv.querySelector('h3[class*="header_"]');
const authorElement = headerText ? headerText.querySelector('span[class*="username_"]') : null;
const author = authorElement ?
authorElement.textContent.replace('@', '') :
lastKnownAuthor || 'Unknown';
if (authorElement) lastKnownAuthor = author;
/* Extract timestamp */
const timeElement = messageDiv.querySelector('time');
const timestamp = timeElement ? timeElement.getAttribute('datetime') : new Date().toISOString();
/* Extract content */
const contentDiv = messageDiv.querySelector('div[id^="message-content-"]');
const content = contentDiv ?
Array.from(contentDiv.querySelectorAll('span'))
.map(span => span.textContent)
.join('') : '';
/* Handle reply context */
const replyDiv = messageDiv.querySelector('div[class*="repliedMessage_"]');
const messageObj = {
id: messageId,
content: content,
author: author,
timestamp: timestamp
};
if (replyDiv) {
const replyContent = replyDiv.querySelector('div[class*="repliedTextContent_"]');
const replyAuthor = replyDiv.querySelector('span[class*="username_"]');
if (replyContent && replyAuthor) {
const replyContentText = Array.from(replyContent.querySelectorAll('span'))
.map(span => span.textContent)
.join('');
/* Only add reference if content differs */
if (replyContentText !== content) {
messageObj.referenced_id = replyContent.id.split('-').pop();
messageObj.referenced_author = replyAuthor.textContent.replace('@', '');
messageObj.referenced_content = replyContentText;
}
}
}
initialMessages.push(messageObj);
});
return initialMessages;
}
function reconstructThreads() {
const messagesArray = Array.from(messages.values());
const threads = new Map();
const processedIds = new Set();
/* First pass: identify root messages */
messagesArray.forEach(msg => {
if (!msg.referenced_id) {
threads.set(msg.id, {
root: {
author: msg.author,
content: msg.content,
timestamp: msg.timestamp
},
replies: []
});
}
});
/* Second pass: attach replies to their threads */
messagesArray.forEach(msg => {
if (msg.referenced_id) {
let threadRoot = msg.referenced_id;
/* Walk up the reference chain to find the root */
let currentMsg = messages.get(threadRoot);
while (currentMsg && currentMsg.referenced_id) {
threadRoot = currentMsg.referenced_id;
currentMsg = messages.get(threadRoot);
}
/* If we found a valid thread, add the reply */
if (threads.has(threadRoot)) {
threads.get(threadRoot).replies.push({
author: msg.author,
content: msg.content,
timestamp: msg.timestamp
});
processedIds.add(msg.id);
}
}
});
/* Convert to array and clean up empty threads */
return Array.from(threads.values())
.filter(thread => thread.replies.length > 0)
.sort((a, b) => new Date(a.root.timestamp) - new Date(b.root.timestamp));
}
function processMessages(data, isFromAPI = true) {
if (!Array.isArray(data)) return;
/* Convert API messages to our format if needed */
const processedBatch = isFromAPI ?
data.map(msg => ({
id: msg.id,
content: msg.content,
author: msg.author.global_name || msg.author.username,
timestamp: msg.timestamp,
...(msg.referenced_message && {
referenced_id: msg.referenced_message.id,
referenced_author: msg.referenced_message.author.global_name || msg.referenced_message.author.username,
referenced_content: msg.referenced_message.content
})
})) : data;
/* Sort by timestamp */
const sortedBatch = processedBatch.sort((a, b) => new Date(a.timestamp) - new Date(b.timestamp));
/* Prepend batch to maintain chronological order */
const oldMessages = Array.from(messages.values());
messages.clear();
/* Add new batch first, then existing messages */
sortedBatch.forEach(msg => messages.set(msg.id, msg));
oldMessages.forEach(msg => messages.set(msg.id, msg));
counter.textContent = `Messages: ${messages.size}`;
}
/* Create buttons */
const buttonStyle = `
padding: 5px 10px;
border: none;
color: white;
border-radius: 3px;
cursor: pointer;
`;
const downloadBtn = document.createElement('button');
downloadBtn.textContent = 'Download';
downloadBtn.style.cssText = buttonStyle + 'background: #5865F2;';
downloadBtn.onclick = () => {
const data = reconstructThreads();
const blob = new Blob([JSON.stringify(data, null, 2)], {type: 'application/json'});
const url = URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
a.download = 'discord-threads.json';
a.click();
URL.revokeObjectURL(url);
};
const copyBtn = document.createElement('button');
copyBtn.textContent = 'Copy';
copyBtn.style.cssText = buttonStyle + 'background: #5865F2;';
copyBtn.onclick = () => {
const data = reconstructThreads();
navigator.clipboard.writeText(JSON.stringify(data, null, 2)).then(() => {
copyBtn.textContent = 'Copied!';
setTimeout(() => copyBtn.textContent = 'Copy', 1000);
});
};
const closeBtn = document.createElement('button');
closeBtn.textContent = 'β';
closeBtn.style.cssText = buttonStyle + 'background: #ed4245;';
closeBtn.onclick = () => panel.remove();
/* Assemble UI */
panel.appendChild(counter);
panel.appendChild(downloadBtn);
panel.appendChild(copyBtn);
panel.appendChild(closeBtn);
/* Intercept XHR requests */
const originalXHR = window.XMLHttpRequest;
window.XMLHttpRequest = function() {
const xhr = new originalXHR();
const originalOpen = xhr.open;
xhr.open = function() {
this.addEventListener('load', function() {
if (this.responseURL.includes('/messages?')) {
try {
const data = JSON.parse(this.responseText);
processMessages(data, true);
} catch (e) {
console.error('Error processing response:', e);
}
}
});
originalOpen.apply(this, arguments);
};
return xhr;
};
/* Intercept fetch requests */
const originalFetch = window.fetch;
window.fetch = function(url, options) {
const promise = originalFetch(url, options);
if (url.toString().includes('/messages?')) {
promise.then(response => response.clone().json())
.then(data => processMessages(data, true))
.catch(e => console.error('Error processing fetch:', e));
}
return promise;
};
/* Process initial messages */
const initialMessages = extractInitialMessages();
if (initialMessages.length > 0) {
processMessages(initialMessages, false);
}
/* Add panel to page */
document.body.appendChild(panel);
console.log('Discord scraper active. Scroll to capture messages.');
})();