-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathdatabase.py
205 lines (168 loc) · 6.14 KB
/
database.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
'''
Hosts DB related stuff.
The database used in this project is MongoDB and it has been chosen
to provide flexibility with handling large documents with text.
'''
import pprint
import syntax
import copy
from pymongo import MongoClient
from bson.objectid import ObjectId
from syntax import *
import gridfs
import json
try:
global client
client = MongoClient()
except pymongo.errors.ConnectionFailure as e:
print("Could not connect to MongoDB: %s" % e)
class Database:
"""Database Wrapper Class. Serves for database wrapping"""
def __init__(self):
"""Database wrapper constructor"""
self.db = client['3gmdb']
self.issues = self.db.issues
self.laws = self.db.laws
self.links = self.db.links
self.topics = self.db.topics
self.named_entities = self.db.named_entities
self.archive_links = self.db.archive_links
self.fs = gridfs.GridFS(self.db)
self.summaries = self.db.summaries
def insert_issue_to_db(self, issue):
"""Inserts issue to database"""
issue.detect_signatories()
serializable = {
'issue_date': str(
issue.issue_date),
'issue_number': issue.issue_number,
'articles': issue.articles,
'extracts': [
(article,
list(
issue.get_extracts(article))) for article in issue.articles.keys()],
'non_extracts': [
(article,
list(
issue.get_non_extracts(article))) for article in issue.articles.keys()],
'signatories': [
signatory.__dict__ for signatory in issue.signatories]}
self.issues.insert(serializable)
def print_laws(self):
"""Print laws in the database with pprint"""
cursor = self.laws.find({})
for x in cursor:
pprint.pprint(x)
def drop_laws(self):
"""Drop laws collection"""
self.db.drop_collection('laws')
def drop_archive_links(self):
"""Drop archive links"""
self.db.drop_collection('archive_links')
def drop_issues(self):
"""Drop issues collection"""
self.db.drop_collection('issues')
def push_law_to_db(self, law):
"""Push law to database via serializing it
:params law LawParser object"""
self.laws.save(law.serialize())
def query_from_tree(self, law, tree, issue_name=None):
"""Apply query from tree"""
print('Querying from tree')
result = law.query_from_tree(tree)
result['_version'] = law.version_index
if issue_name:
result['amendee'] = issue_name
cur = self.laws.find(
{"_id": law.identifier, "versions.amendee": {"$ne": issue_name}})
cur = list(cur)
if cur == []:
self.laws.save({'_id': law.identifier})
temp = {'_id': law.identifier}
else:
temp = cur[0]
if 'versions' in temp.keys():
temp['versions'].append(result)
else:
temp['versions'] = [result]
print(temp)
self.laws.save(temp)
def insert_links(self, links):
"""Insert links to database"""
for link in links:
self.links.save(link.serialize())
def drop_links(self):
"""Drop links collection"""
self.db.drop_collection('links')
def drop_topics(self):
"""Drop topics collection"""
self.db.drop_collection('topics')
def drop_named_entities(self):
"""Drop named entities collection"""
self.db.drop_collection('named_entities')
def checkout_laws(self, identifier=None, version=0):
"""Checkout to certain version
:param identifier Law to apply checkout"""
x = self.get_json_from_fs(identifier)
try:
for v in x['versions']:
if int(v['_version']) == version:
y = {
'_id': identifier,
'versions': [v]
}
break
self.laws.save(y)
# Version 0 does not exist
except IndexError:
pass
return y
def rollback_laws(self, identifier=None):
"""Rollback laws
:param identifier If None rollback everything else rollback certain id"""
return self.checkout_laws(identifier=identifier, version=0)
def rollback_links(self, identifier=None, rollback_laws=False):
"""Rollback links
:param identifier If none rollback everything else rollback certain id
:param rollback_laws if true rollback laws
"""
if identifier != None:
cursor = self.links.find({
'_id': identifier,
'actual_links.status': 'εφαρμοσμένος'
})
else:
cursor = self.links.find({
'actual_links.status': 'εφαρμοσμένος'
})
for x in cursor:
tmp = copy.copy(x)
for y in tmp['actual_links']:
y['status'] = 'μη εφαρμοσμένος'
self.links.save(tmp)
if rollback_laws:
self.rollback_laws(identifier=identifier)
return tmp
def rollback_all(self):
"""Rollsback everything in the database"""
self.rollback_links(identifier=None, rollback_laws=True)
def put_json_to_fs(self, _id, _json):
"""Put a json to GridFS"""
dump = json.dumps(_json, ensure_ascii=False).encode('utf-8')
return self.fs.put(dump, _id=_id)
def save_json_to_fs(self, _id, _json):
"""Save a json to GridFS"""
self.fs.delete(_id)
logging.info('File nonexistent')
self.put_json_to_fs(_id, _json)
def get_json_from_fs(self, _id=None):
"""Get json from GridFS"""
dump = self.fs.find_one({'_id': _id})
return json.loads(dump.read().decode('utf-8'))
def drop_fs(self):
"""Drop GridFS"""
self.db.drop_collection('fs.files')
self.db.drop_collection('fs.chunks')
def drop_summaries(self):
"""Drop summaries"""
self.db.drop_collection('summaries')