-
Notifications
You must be signed in to change notification settings - Fork 16
/
Copy pathdata_handling.py
305 lines (222 loc) · 9.99 KB
/
data_handling.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
import time
import os
import re
import sqlite3
import pprint
def database_query(args, database_name, previous_results):
# Check if Database File Exists
if os.path.exists('./' + database_name):
# Define Local Variables
database_results = []
results_found = []
# Search for Matching Results
# Only Match Title or URL
for entry in previous_results:
if re.search(args.databasequery, entry[1], re.IGNORECASE) or re.search(args.databasequery, entry[2], re.IGNORECASE):
results = "{} | {} | {}".format(entry[0], entry[1], entry[2])
results_found.append(results)
# Print Matching Results
for result in results_found:
print(result)
print('\n' + str(len(results_found)) + ' Results Found')
exit(0)
else:
print("No Database Detected..")
exit(1)
# Previous Database Function
def previous_database(args, database_name, database_file, detect_database, database_results, previous_results):
# Check if Database File Exists
if os.path.exists('./' + database_name):
# If Merge Database
if args.mergedatabase:
# Check if Database File Exists
if os.path.exists('./' + args.mergedatabase):
# Create Database Connection and Cursor
connection = sqlite3.connect(args.mergedatabase)
cursor = connection.cursor()
else:
print('No Database ' + str(args.mergedatabase) + ' Detected..')
exit(1)
else:
# Create Database Connection and Cursor
connection = sqlite3.connect(database_name)
cursor = connection.cursor()
# If Database Merge or Database Query Mode
if args.databasequery or args.mergedatabase:
# Query Data from Database
previous_results = cursor.execute(
'SELECT query, title, url FROM search_results').fetchall()
# Close Database Connection
connection.close()
# If Merge Database Pass
if args.mergedatabase:
pass
else:
# If Previous Database Detected Set to True
detect_database = True
return previous_results, detect_database
else:
# Query Data from Database
previous_results = cursor.execute(
'SELECT title, url FROM search_results').fetchall()
# Close Database Connection
connection.close()
# If Previous Database Detected Set to True
detect_database = True
return previous_results, detect_database
# If Merge Database Send Previous Results to Database
if args.mergedatabase:
# Store Search Results in SQL
database_results = store_results(
args, database_name, database_file, previous_results)
print("Database Merged")
print('TOTAL NUMBER OF RESULTS: ' + str(len(database_results)))
exit(0)
else:
# If Merge Database Send Previous Results to Database
if args.mergedatabase:
# Check if Database File Exists
if os.path.exists('./' + args.mergedatabase):
# Create Database Connection and Cursor
connection = sqlite3.connect(args.mergedatabase)
cursor = connection.cursor()
else:
print('No Database "' + str(args.mergedatabase) + '" Detected..')
exit(1)
# Query Data from Database
previous_results = cursor.execute(
'SELECT query, title, url FROM search_results').fetchall()
# Close Database Connection
connection.close()
# Store Search Results in SQL
database_results = store_results(
args, database_name, database_file, previous_results)
print("Database Merged")
print('TOTAL NUMBER OF RESULTS: ' + str(len(database_results)))
exit(0)
return previous_results, detect_database
# Prepare Data Function
def prepare_data(results, concurrent_connections, previous_server):
# Initialize the Results Data List
list_of_raw_results = []
list_of_results = []
results_list = []
results_data = []
# If Results are from Concurrent Requests Unpack Them
if type(results) == list:
for number in range(concurrent_connections):
responses = results[number].result()
list_of_raw_results.append(responses.json())
list_of_results.append(responses.json()['results'])
# Create a List of Dictionaries from Each Request
for entry in list_of_results:
for results in entry:
results_entry = {'title': results['title'],
'url': results['url']}
results_data.append(results_entry)
else:
if type(results) == tuple:
# Unpack Nested Results Data
raw_results, previous_server = results
for number in range(concurrent_connections):
responses = raw_results[number].result()
list_of_raw_results.append(responses.json())
list_of_results.append(responses.json()['results'])
# Create a List of Dictionaries from Each Request
for entry in list_of_results:
for results in entry:
results_entry = {'title': results['title'],
'url': results['url']}
results_data.append(results_entry)
else:
raw_results_dict = results.json()
results_list = raw_results_dict['results']
# Create a List of Dictionaries for Each Result
for results_dict in results_list:
results_entry = {'title': results_dict['title'],
'url': results_dict['url']}
results_data.append(results_entry)
# Result Results Data
return results_data, list_of_raw_results, previous_server
# Store Results in SQL Function
def store_results(args, database_name, database_file, results_data):
# Database File Creation
if not database_file:
if not os.path.exists('./' + database_name):
os.mknod(database_name)
database_file = os.getcwd() + '/' + database_name
# Create Database Connection and Cursor
connection = sqlite3.connect(database_name)
cursor = connection.cursor()
# Check if Database Table Exists
table_exists = cursor.execute(
"SELECT name FROM sqlite_master WHERE type='table' AND name='search_results';").fetchall()
# If Database Table Does Not Exist Create it
if table_exists == []:
cursor.execute(
"CREATE TABLE search_results (query TEXT, title TEXT, url TEXT)")
# If Merge Database
if args.mergedatabase:
# Insert Data into Database
for entry in results_data:
cursor.execute(
"INSERT INTO search_results VALUES (?, ?, ?);",
(entry[0], entry[1], entry[2]))
else:
# Insert Data into Database
for entry in results_data:
cursor.execute(
"INSERT INTO search_results VALUES (?, ?, ?);",
(args.query, entry['title'], entry['url']))
# Deduplicate SQL Entries
cursor.execute("CREATE TABLE temp_table as SELECT DISTINCT * FROM search_results;")
cursor.execute("DELETE FROM search_results;")
cursor.execute("INSERT INTO search_results SELECT * FROM temp_table ORDER BY query")
cursor.execute("DROP TABLE temp_table;")
# Commit Database Entries
connection.commit()
# Query Data from Database
database_results = cursor.execute(
'SELECT title, url FROM search_results').fetchall()
# Close Database Connection
connection.close()
# Return Database Entries
return database_results
# Output Function
def output_results(args, concurrent_connections,
results, list_of_raw_results,
database_name, database_results,
exceeded_database, previous_results,
total_current_results):
# Initialize Local Current Results Variable
current_results = []
# Create Formated List of Current Results
for result in database_results:
current_result = "{} | {}".format(result[0], result[1])
current_results.append(current_result)
# Pretty Print Raw JSON Reponses in Very Verbose Mode for Single Page Mode
if not concurrent_connections and args.veryverbose is True:
# Print Raw Search Results to STDOUT
pprint.pprint(results[0].json())
# Pretty Print Raw JSON Reponses in Very Verbose Mode for Concurrent Page Mode
elif concurrent_connections > 0 and args.veryverbose is True:
pprint.pprint(list_of_raw_results)
# Print New Results
elif args.veryverbose is False or previous_results != total_current_results or previous_results == []:
for result in current_results:
if result not in total_current_results:
print(result)
total_current_results.append(result)
# If Database Limit Set
if args.limitdatabase:
# If Database Lenth is Greater Than or Equal to
# Database Limit Times One Thousand
if len(database_results) >= args.limitdatabase * 1000:
# Backup Previous Database
os.rename('./' + database_name, './' + str(time.strftime('%H:%M:%S-')) + database_name)
# Set Exceed Database to True
exceeded_database = True
print("\nMaximum Database Size Exceeded.." +
"\nBacking up Database: " + str(time.strftime('%H:%M:%S-')) + database_name +
'\nTOTAL NUMBER OF RESULTS: ' + str(len(database_results)))
return exceeded_database, previous_results, total_current_results