forked from kungming2/translator-BOT
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathZiwen.py
8033 lines (6797 loc) · 327 KB
/
Ziwen.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env python3
# -*- coding: UTF-8 -*-
"""
Ziwen [ZW] is the main active component of u/translator-BOT, servicing r/translator and other communities.
Ziwen posts comments and sends messages and also moderates and keeps r/translator organized. It also provides community
members with useful reference information and enforces the community's formatting guidelines.
"""
import calendar
import datetime
import sqlite3 # For processing and accessing the databases.
import time
import traceback # For documenting errors that are encountered.
import googlesearch
import jieba # Segmenter for Mandarin Chinese.
import MeCab # Advanced segmenter for Japanese.
import pafy # Gets YouTube video length.
import praw # Simple interface to the Reddit API that also handles rate limiting of requests.
import prawcore # The base module praw for error logging.
import requests
import romkan # Needed for automatic Japanese romaji conversion.
import tinysegmenter # Basic segmenter for Japanese; not used on Windows.
import youtube_dl # Needed for some exception logging, also used by pafy.
from bs4 import BeautifulSoup as Bs
from lxml import html
from mafan import simplify, tradify
from wiktionaryparser import WiktionaryParser
from _languages import *
from _config import *
from _responses import *
from Data import _ko_romanizer
"""
UNIVERSAL VARIABLES
These variables (all denoted by UPPERCASE names) are variables used by many functions in Ziwen. These are important
as they define many of the basic functions of the bot.
"""
BOT_NAME = "Ziwen"
VERSION_NUMBER = "1.8.11"
USER_AGENT = (
"{} {}, a notifications messenger, general commands monitor, and moderator for r/translator. "
"Written and maintained by u/kungming2.".format(BOT_NAME, VERSION_NUMBER)
)
SUBREDDIT = "translator"
TESTING_MODE = False
# This is how many posts Ziwen will retrieve all at once. PRAW can download 100 at a time.
MAXPOSTS = 100
# This is how many seconds Ziwen will wait between cycles. The bot is completely inactive during this time.
WAIT = 30
# After this many cycles, the bot will clean its database, keeping only the latest (CLEANCYCLES * MAXPOSTS) items.
CLEANCYCLES = 90
# How long do we allow people to `!claim` a post? This is defined in seconds.
CLAIM_PERIOD = 28800
# A boolean that enables the bot to send messages. Used for testing.
MESSAGES_OKAY = True
# A number that defines the soft number of notifications an individual will get in a month *per language*.
NOTIFICATIONS_LIMIT = 100
"""KEYWORDS LISTS"""
# These are the the commands on r/translator.
KEYWORDS = [
"!page:",
"`",
"!missing",
"!translated",
"!id:",
"!set:",
"!note:",
"!reference:",
"!search:",
"!doublecheck",
"!identify:",
"!translate",
"!translator",
"!delete",
"!claim",
"!reset",
"!long",
"!restore",
]
# These are the words that count as a 'short thanks' from the OP.
# If a message includes them, the bot won't message them asking them to thank the translator.
THANKS_KEYWORDS = [
"thank",
"thanks",
"tyvm",
"tysm",
"thx",
"danke",
"arigato",
"gracias",
"appreciate",
"solved",
]
# These are keywords that if included with `!translated` will give credit to the parent commentator.
VERIFYING_KEYWORDS = [
"concur",
"agree",
"verify",
"verified",
"approve",
"is correct",
"is right",
"well done",
"well-done",
"good job",
"marking",
"good work",
]
# A cache for language multipliers, generated each instance of running.
# Allows us to access the wiki less and speed up the process.
CACHED_MULTIPLIERS = {}
"""
CONNECTIONS TO REDDIT & SQL DATABASES
Ziwen relies on several SQLite3 files to store its data and uses PRAW to connect to Reddit's API.
"""
logger.info("[ZW] Startup: Accessing SQL databases...")
# This connects to the local cache used for detecting edits and the multiplier cache for points.
conn_cache = sqlite3.connect(FILE_ADDRESS_CACHE)
cursor_cache = conn_cache.cursor()
# This connects to the main database, including notifications, points, and past processed data.
conn_main = sqlite3.connect(FILE_ADDRESS_MAIN)
cursor_main = conn_main.cursor()
# This connects to the database for Ajos, objects that the bot generates for posts.
conn_ajo = sqlite3.connect(FILE_ADDRESS_AJO_DB)
cursor_ajo = conn_ajo.cursor()
if len(sys.argv) > 1: # This is a new startup with additional parameters for modes.
specific_mode = sys.argv[1].lower()
if specific_mode == "test":
TESTING_MODE = True
SUBREDDIT = "trntest"
MESSAGES_OKAY = False
logger.info(
"[ZW] Startup: Starting up in TESTING MODE for r/{}...".format(SUBREDDIT)
)
# Connecting to the Reddit API via OAuth.
logger.info("[ZW] Startup: Logging in as u/{}...".format(USERNAME))
reddit = praw.Reddit(
client_id=ZIWEN_APP_ID,
client_secret=ZIWEN_APP_SECRET,
password=PASSWORD,
user_agent=USER_AGENT,
username=USERNAME,
)
r = reddit.subreddit(SUBREDDIT)
logger.info(
"[ZW] Startup: Initializing {} {} for r/{} with languages module {}.".format(
BOT_NAME, VERSION_NUMBER, SUBREDDIT, VERSION_NUMBER_LANGUAGES
)
)
"""
MAINTENANCE FUNCTIONS
These functions are run at Ziwen's startup and also occasionally in order to refresh their information. Most of them
fetch data from r/translator itself or r/translatorBOT for internal variables.
Maintenance functions are all prefixed with `maintenance` in their name.
"""
def maintenance_template_retriever():
"""
Function that retrieves the current flairs available on the subreddit and returns a dictionary.
Dictionary is keyed by the old css_class, with the long-form template ID as a value per key.
Example: 'cs': XXXXXXXX
:return new_template_ids: A dictionary containing all the templates on r/translator.
:return: An empty dictionary if it cannot find the templates for some reason.
"""
new_template_ids = {}
# Access the templates on the subreddit.
for template in r.flair.link_templates:
css_associated_code = template["css_class"]
new_template_ids[css_associated_code] = template["id"]
# Return a dictionary, if there's data, otherwise return an empty dictionary.
if len(new_template_ids.keys()) != 0:
return new_template_ids
else:
return {}
def maintenance_most_recent():
"""
A function that grabs the usernames of people who have submitted to r/translator in the last 24 hours.
Another function can check against this to make sure people aren't submitting too many.
:return most_recent: A list of usernames that have recently submitted to r/translator. Duplicates will be on there.
"""
# Define the time parameters (24 hours earlier from present)
most_recent = []
current_vaqt = int(time.time())
current_vaqt_day_ago = current_vaqt - 86400
# 100 should be sufficient for the last day, assuming a monthly total of 3000 posts.
posts = []
posts += list(r.new(limit=100))
# Process through them - we really only care about the username and the time.
for post in posts:
ocreated = int(post.created_utc) # Unix time when this post was created.
try:
oauthor = post.author.name
except AttributeError:
# Author is deleted. We don't care about this post.
continue
# If the time of the post is after our limit, add it to our list.
if ocreated > current_vaqt_day_ago:
if oauthor != "translator-BOT":
most_recent.append(oauthor)
# Return the list
return most_recent
def maintenance_get_verified_thread():
"""
Function to quickly get the Reddit ID of the latest verification thread on startup.
This way, the ID of the thread does not need to be hardcoded into Ziwen.
:return verification_id: The Reddit ID of the newest verification thread as a string.
"""
verification_id = ""
# Search for the latest verification thread.
search_term = "title:verified AND flair:meta"
# Note that even in testing ('trntest') we will still search r/translator for the thread.
search_results = reddit.subreddit("translator").search(
search_term, time_filter="year", sort="new", limit=1
)
# Iterate over the results to get the ID.
for post in search_results:
verification_id = post.id
return verification_id
def maintenance_blacklist_checker():
"""
A start-up function that runs once and gets blacklisted usernames from the wiki of r/translatorBOT.
Blacklisted users are those who have abused the subreddit functions on r/translator but are not banned.
This is an anti-abuse system, and it also disallows them from crossposting with Ziwen Streamer.
:return blacklist_usernames: A list of usernames on the blacklist, all in lowercase.
"""
# Retrieve the page.
blacklist_page = reddit.subreddit("translatorBOT").wiki["blacklist"]
overall_page_content = str(blacklist_page.content_md) # Get the page's content.
usernames_raw = overall_page_content.split("####")[1]
usernames_raw = usernames_raw.split("\n")[2].strip() # Get just the usernames.
# Convert the usernames into a list.
blacklist_usernames = usernames_raw.split(", ")
# Convert the usernames to lowercase.
blacklist_usernames = [item.lower() for item in blacklist_usernames]
# Exclude AutoModerator from the blacklist.
blacklist_usernames.remove("automoderator")
return blacklist_usernames
def maintenance_database_processed_cleaner():
"""
Function that cleans up the database of processed comments, but not posts (yet).
:return: Nothing.
"""
pruning_command = "DELETE FROM oldcomments WHERE id NOT IN (SELECT id FROM oldcomments ORDER BY id DESC LIMIT ?)"
cursor_main.execute(pruning_command, [MAXPOSTS * 10])
conn_main.commit()
"""
AJO CLASS/FUNCTIONS
The basic unit of Ziwen's functions is not an individual Reddit post on r/translator per se, but rather, an Ajo.
An Ajo class (from Esperanto aĵo, meaning 'thing') is constructed from a Reddit post but is saved locally and contains
additional information that cannot be stored on Reddit's system.
In Ziwen, changes to a post's language, state, etc. are made first to the Ajo. Then a function within the class can
determine its flair, flair text, and template.
Note: Wenyuan (as of version 3.0) also uses Ajos for its statistics-keeping.
External Ajo-specific/related functions are all prefixed with `ajo` in their name. The Ajo class itself contains several
class functions.
"""
def ajo_defined_multiple_flair_assessor(flairtext):
"""
A routine that evaluates a defined multiple flair text and its statuses as a dictionary.
It can make sense of the symbols that are associated with various states of a post.
:param flairtext: The flair text of a defined multiple post. (e.g. `Multiple Languages [CS, DE✔, HU✓, IT, NL✔]`)
:return final_language_codes: A dictionary keyed by language and their respective states (translated, claimed, etc)
"""
final_language_codes = {}
flairtext = flairtext.lower()
languages_list = flairtext.split(", ")
for language in languages_list:
# Get just the language code.
language_code = " ".join(re.findall("[a-zA-Z]+", language))
if len(language_code) != len(
language
): # There's a difference - maybe a symbol in DEFINED_MULTIPLE_LEGEND
for symbol in DEFINED_MULTIPLE_LEGEND:
if symbol in language:
final_language_codes[language_code] = DEFINED_MULTIPLE_LEGEND[
symbol
]
else: # No difference, must be untranslated.
final_language_codes[language_code] = "untranslated"
return final_language_codes
def ajo_defined_multiple_flair_former(flairdict):
"""
Takes a dictionary of defined multiple statuses and returns a string.
To be used with the ajo_defined_multiple_flair_assessor() function above.
:param flairdict: A dictionary keyed by language and their respective states.
:return output_text: A string for use in the flair text. (e.g. `Multiple Languages [CS, DE✔, HU✓, IT, NL✔]`)
"""
output_text = []
for key, value in flairdict.items(): # Iterate over each item in the dictionary.
# Try to get the ISO 639-1 if possible
language_code = iso639_3_to_iso639_1(key)
if language_code is None: # No ISO 639-1 code
language_code = key
status = value
symbol = ""
for key2, value2 in DEFINED_MULTIPLE_LEGEND.items():
if value2 == status:
symbol = key2
continue
format_type = "{}{}".format(language_code.upper(), symbol)
output_text.append(format_type)
output_text = list(sorted(output_text)) # Alphabetize
output_text = ", ".join(output_text) # Create a string.
output_text = "[{}]".format(output_text)
return output_text
def ajo_defined_multiple_comment_parser(pbody, language_names_list):
"""
Takes a comment and a list of languages and looks for commands and language names.
This allows for defined multiple posts to have separate statuses for each language.
We don't keep English though.
:param pbody: The text of a comment on a defined multiple post we're searching for.
:param language_names_list: The languages defined in the post (e.g. [CS, DE✔, HU✓, IT, NL✔])
:return: None if none found, otherwise a tuple with the language name that was detected and its status.
"""
detected_status = None
status_keywords = {
KEYWORDS[2]: "missing",
KEYWORDS[3]: "translated",
KEYWORDS[9]: "doublecheck",
KEYWORDS[14]: "inprogress",
}
# Look for language names.
detected_languages = language_mention_search(pbody)
# Remove English if detected.
if detected_languages is not None and "English" in detected_languages:
detected_languages.remove("English")
if detected_languages is None or len(detected_languages) == 0:
return None
# We only want to keep the ones defined in the spec.
for language in detected_languages:
if language not in language_names_list:
detected_languages.remove(language)
# If there are none left then we return None
if len(detected_languages) == 0:
return None
for keyword in status_keywords.keys():
if keyword in pbody:
detected_status = status_keywords[keyword]
if detected_status is not None:
return detected_languages, detected_status
def ajo_retrieve_script_code(script_name):
"""
This function takes the name of a script, outputs its ISO 15925 code and the name as a tuple if valid.
:param script_name: The *name* of a script (e.g. Siddham, Nastaliq, etc).
:return: None if the script name is invalid, otherwise a tuple with the ISO 15925 code and the script's name.
"""
codes_list = []
names_list = []
csv_file = csv.reader(
open(FILE_ADDRESS_ISO_ALL, "rt", encoding="utf-8"), delimiter=","
)
for row in csv_file:
if len(row[0]) == 4: # This is a script code. (the others are 3 characters. )
codes_list.append(row[0])
names_list.append(
row[2:][0]
) # It is normally returned as a list, so we need to convert into a string.
if script_name in names_list: # The name is in the code list
item_index = names_list.index(script_name)
item_code = codes_list[item_index]
item_code = str(item_code)
return item_code, script_name
else:
return None
class Ajo:
"""
A equivalent of a post on r/translator. Used as an object for Ziwen and Wenyuan to work with for
consistency with languages and to store extra data.
The process is: Submission > Ajo (changes made to it) > Ajo.update().
After a submission has been turned into an Ajo, Ziwen will only work with the Ajo unless it has to update Reddit's
flair.
Attributes:
id: A Reddit submission that forms the base of this class.
created_utc: The Unix time that the item was created.
author: The Reddit username of the creator. [deleted] if not found.
author_messaged: A boolean that marks whether or not the creator has been messaged that their post has been
translated.
type: single, multiple
country_code: The ISO 3166-2 code of a country associated with the language. None by default.
language_name: The English name of the post's language, rendered as a string
(Note: Unknown, Nonlanguage, and Conlang posts, etc. count as a language_name)
language_code_1: The ISO 639-1 code of a post's language, rendered as a string. None if non-existent.
language_code_3: The ISO 639-3 code of a post's language, rendered as a string.
language_history: The different names the post has been classified as, stored as a list (in sequence)
status: The current situation of the post. untranslated, translated, needs review, in progress, or missing.
title: The title of the post, minus the language tag part. Defaults to the reg. title if it's not determinable.
title_original: The exact Reddit title of the post.
script_name: The type of script it's classified as (None normally)
script_code: Corresponding code
is_supported: Boolean of whether this is a supported CSS class or not.
is_bot_crosspost: Is it a crosspost from u/translator-BOT?
is_identified: Is it a changed class?
is_long: Is it a long post?
is_script: Is it an Unknown post whose script has been identified?
original_source_language_name: The ORIGINAL source language(s) it was classified as.
original_target_language_name: The ORIGINAL target language(s) it was classified as.
direction: Is the submission to/from English, both/neither.
output_oflair_css: The CSS class that it should be flaired as.
output_oflair_text: The text that accompanies it.
parent_crosspost: If it's a crosspost, what's the original one.
time_delta: The time between the initial submission time and it being marked. This is a dictionary.
Example of an output for flair is German (Identified/Script) (Long)
"""
# noinspection PyUnboundLocalVariable
def __init__(
self, reddit_submission
): # This takes a Reddit Submission object and generates info from it.
if type(reddit_submission) is dict: # Loaded from a file?
logger.debug("[ZW] Ajo: Loaded Ajo from local database.")
for key in reddit_submission:
setattr(self, key, reddit_submission[key])
else: # This is loaded from reddit.
logger.debug("[ZW] Ajo: Getting Ajo from Reddit.")
self.id = reddit_submission.id # The base Reddit submission ID.
self.created_utc = int(reddit_submission.created_utc)
# Create some empty variables that can be used later.
self.recorded_translators = []
self.notified = []
self.time_delta = {}
self.author_messaged = False
# try:
title_data = title_format(reddit_submission.title)
try: # Check if user is deleted
self.author = reddit_submission.author.name
except AttributeError:
# Comment author is deleted
self.author = "[deleted]"
if reddit_submission.link_flair_css_class in ["multiple", "app"]:
self.type = "multiple"
else:
self.type = "single"
# oflair_text is an internal variable used to mimic the linkflair text.
if reddit_submission.link_flair_text is None: # There is no linkflair text.
oflair_text = "Generic"
self.is_identified = (
self.is_long
) = self.is_script = self.is_bot_crosspost = self.is_supported = False
else:
if "(Long)" in reddit_submission.link_flair_text:
self.is_long = True
# oflair_text = reddit_submission.link_flair_text.split("(")[0].strip()
else:
self.is_long = False
if (
reddit_submission.link_flair_css_class == "unknown"
): # Check to see if there is a script classified.
if "(Script)" in reddit_submission.link_flair_text:
self.is_script = True
self.script_name = (
oflair_text
) = reddit_submission.link_flair_text.split("(")[0].strip()
self.script_code = ajo_retrieve_script_code(self.script_name)[0]
self.is_identified = False
else:
self.is_script = False
self.is_identified = False
self.script_name = self.script_code = None
oflair_text = "Unknown"
else:
if "(Identified)" in reddit_submission.link_flair_text:
self.is_identified = True
oflair_text = reddit_submission.link_flair_text.split("(")[
0
].strip()
else:
self.is_identified = False
if "(" in reddit_submission.link_flair_text: # Contains (Long)
oflair_text = reddit_submission.link_flair_text.split("(")[
0
].strip()
else:
oflair_text = reddit_submission.link_flair_text
if title_data is not None:
self.direction = title_data[8]
if len(title_data[0]) == 1:
# The source language data is converted into a list. If it's just one, let's make it a string.
self.original_source_language_name = title_data[0][
0
] # Take the only item
else:
self.original_source_language_name = title_data[0]
if len(title_data[1]) == 1:
# The target language data is converted into a list. If it's just one, let's make it a string.
self.original_target_language_name = title_data[1][
0
] # Take the only item
else:
self.original_target_language_name = title_data[1]
if len(title_data[4]) != 0: # Were we able to determine a title?
self.title = title_data[4]
self.title_original = reddit_submission.title
else:
self.title = self.title_original = reddit_submission.title
if (
"{" in reddit_submission.title and "}" in reddit_submission.title
): # likely contains a country name
country_suffix_name = re.search(r"{(\D+)}", reddit_submission.title)
country_suffix_name = country_suffix_name.group(
1
) # Get the Country name only
self.country_code = country_converter(country_suffix_name)[
0
] # Get the code (e.g. CH for Swiss)
elif (
title_data[7] is not None and len(title_data[7]) <= 6
): # There is included code from title routine
country_suffix = title_data[7].split("-", 1)[1]
self.country_code = country_suffix
else:
self.country_code = None
if self.type == "single":
if "[" not in oflair_text: # Does not have a language tag. e.g., [DE]
if (
"{" in oflair_text
): # Has a country tag in the flair, so let's take that out.
country_suffix_name = re.search(r"{(\D+)}", oflair_text)
country_suffix_name = country_suffix_name.group(
1
) # Get the Country name only
self.country_code = country_converter(country_suffix_name)[0]
# Now we want to take out the country from the title.
title_first = oflair_text.split("{", 1)[0].strip()
title_second = oflair_text.split("}", 1)[1]
oflair_text = title_first + title_second
converter_data = converter(oflair_text)
self.language_history = [] # Create an empty list.
if (
reddit_submission.link_flair_css_class != "unknown"
): # Regular thing
self.language_name = converter_data[1]
self.language_history.append(converter_data[1])
else:
self.language_name = "Unknown"
self.language_history.append("Unknown")
if len(converter_data[0]) == 2:
self.language_code_1 = converter_data[0]
else:
self.language_code_3 = converter_data[0]
self.is_supported = converter_data[2]
if len(converter_data[0]) == 2: # Find the matching ISO 639-3 code.
self.language_code_3 = MAIN_LANGUAGES[converter_data[0]][
"language_code_3"
]
else:
self.language_code_1 = None
else: # Does have a language tag.
language_tag = reddit_submission.link_flair_text.split("[")[1][
:-1
].lower() # Get the characters
if (
language_tag != "?" and language_tag != "--"
): # Non-generic versions
converter_data = converter(language_tag)
self.language_name = converter_data[1]
self.is_supported = converter_data[2]
if len(language_tag) == 2:
self.language_code_1 = language_tag
self.language_code_3 = MAIN_LANGUAGES[language_tag][
"language_code_3"
]
elif len(language_tag) == 3:
self.language_code_1 = None
self.language_code_3 = language_tag
else: # Either a tag for an unknown post or a generic one
if (
language_tag == "?"
): # Unknown post that has still been processed.
self.language_name = "Unknown"
self.language_code_1 = None
self.language_code_3 = "unknown"
self.is_supported = True
elif language_tag == "--": # Generic post
self.language_name = None
self.language_code_1 = None
self.language_code_3 = "generic"
self.is_supported = False
elif (
self.type == "multiple"
): # If it's a multiple type, let's put the language names etc as lists.
self.is_supported = True
self.language_history = []
# Handle DEFINED MULTIPLE
if (
"[" in reddit_submission.link_flair_text
): # There is a list of languages included in the flair
# Return their names from the code.
# test_list_string = "Multiple Languages [DE, FR]"
multiple_languages = []
actual_list = reddit_submission.link_flair_text.split("[")[1][
:-1
] # Get just the codes
actual_list = actual_list.replace(
" ", ""
) # Take out the spaces in the tag.
# Code to replace the special status characters... Not fully sure how it interfaces with the rest
for character in DEFINED_MULTIPLE_LEGEND.keys():
if character in actual_list:
actual_list = actual_list.replace(character, "")
new_code_list = actual_list.split(",") # Convert to a list
for code in new_code_list: # We wanna convert them to list of names
code = code.lower() # Convert to lowercase.
code = "".join(re.findall("[a-zA-Z]+", code))
multiple_languages.append(
converter(code)[1]
) # Append the names of the languages.
else:
multiple_languages = title_data[
6
] # Get the languages that this is for. Will be a list or None.
# Handle REGULAR MULTIPLE
if multiple_languages is None: # This is a catch-all multiple case
if reddit_submission.link_flair_css_class == "multiple":
self.language_code_1 = self.language_code_3 = "multiple"
self.language_name = "Multiple Languages"
self.language_history.append("Multiple Languages")
elif reddit_submission.link_flair_css_class == "app":
self.language_code_1 = self.language_code_3 = "app"
self.language_name = "App"
self.language_history.append("App")
elif multiple_languages is not None:
self.language_code_1 = []
self.language_code_3 = []
self.language_name = []
self.language_history.append("Multiple Languages")
for language in multiple_languages: # Start creating the lists.
self.language_name.append(language)
multi_language_code = converter(language)[0]
if len(multi_language_code) == 2:
self.language_code_1.append(multi_language_code)
self.language_code_3.append(
MAIN_LANGUAGES[converter(language)[0]][
"language_code_3"
]
)
elif len(multi_language_code) == 3:
self.language_code_1.append(None)
self.language_code_3.append(multi_language_code)
if reddit_submission.link_flair_css_class == "translated":
self.status = "translated"
elif reddit_submission.link_flair_css_class == "doublecheck":
self.status = "doublecheck"
elif reddit_submission.link_flair_css_class == "inprogress":
self.status = "inprogress"
elif reddit_submission.link_flair_css_class == "missing":
self.status = "missing"
elif reddit_submission.link_flair_css_class in ["app", "multiple"]:
# It's a generic one.
if isinstance(self.language_code_3, str):
self.status = "untranslated"
elif multiple_languages is not None: # This is a defined multiple
# Construct a status dictionary.(we could also use multiple_languages)
actual_list = reddit_submission.link_flair_text.split("[")[1][
:-1
] # Get just the codes
self.status = ajo_defined_multiple_flair_assessor(
actual_list
) # Pass it to dictionary constructor
else:
self.status = "untranslated"
try:
original_post_id = (
reddit_submission.crosspost_parent
) # Check to see if this is a bot crosspost.
crossposter = reddit_submission.author.name
if crossposter == "translator-BOT":
self.is_bot_crosspost = True
self.parent_crosspost = original_post_id[3:]
else:
self.is_bot_crosspost = False
except AttributeError: # It's not a crosspost.
self.is_bot_crosspost = False
def __eq__(self, other):
"""
Two Ajos are defined as the same if the dictionary representation of their contents match.
:param other: The other Ajo we are comparing against.
:return: A boolean. True if their dictionary contents match, False otherwise.
"""
return self.__dict__ == other.__dict__
def set_status(self, new_status):
"""
Change the status/state of the Ajo - a status like translated, doublecheck, etc.
:param new_status: The new status for the Ajo to have, defined as a string.
"""
self.status = new_status
def set_status_multiple(self, status_language_code, new_status):
"""
Similar to `set_status` but changes the status of a defined multiple post. This function does this by writing
its status as a dictionary instead, keyed by the language.
:param status_language_code: The language code (e.g. `zh`) we want to define the status for.
:param new_status: The new status for that language code.
"""
if isinstance(
self.status, dict
): # Make sure it's something we can actually update
if (
self.status[status_language_code] != "translated"
): # Once something's marked as translated stay there.
self.status[status_language_code] = new_status
else:
pass
def set_long(self, new_long):
"""
Change the `is_long` boolean of the Ajo, a variable that defines whether it's considered a long post.
Moderators can call this function with the command `!long`.
:param new_long: A boolean on whether the post is considered long. True if it is, False otherwise.
:return:
"""
self.is_long = new_long
def set_author_messaged(self, is_messaged):
"""
Change the `author_messaged` boolean of the Ajo, a variable that notes whether the OP of the post has been
messaged that it's been translated.
:param is_messaged: A boolean on whether the author has been messaged. True if they have, False otherwise.
:return:
"""
try:
self.author_messaged = is_messaged
except AttributeError:
self.author_messaged = is_messaged
def set_country(self, new_country_code):
"""
A function that allows us to change the country code in the Ajo. Country codes are generally optional for Ajos,
but they can be defined to provide more granular detail (e.g. `de-AO`).
:param new_country_code: The country code (as a two-letter ISO 3166-1 alpha-2 code) the Ajo should be set as.
"""
if new_country_code is not None:
new_country_code = new_country_code.upper()
self.country_code = new_country_code
def set_language(self, new_language_code, new_is_identified=False):
"""
This changes the language of the Ajo. It accepts a language code as well as an identification boolean.
The boolean is False by default.
:param new_language_code: The new language code to set the Ajo as.
:param new_is_identified: Whether or not the `is_identified` boolean of the Ajo should be set to True.
For example, `!identify` commands will set `is_identified` to True, but
moderator `!set` commands won't. Ajos with `is_identified` as True will have
"(Identified)" appended to their flair text.
:return:
"""
old_language_name = str(self.language_name)
if new_language_code not in [
"multiple",
"app",
]: # This is just a single type of languyage.
self.type = "single"
if len(new_language_code) == 2:
self.language_name = converter(new_language_code)[1]
self.language_code_1 = new_language_code
self.language_code_3 = MAIN_LANGUAGES[new_language_code][
"language_code_3"
]
self.is_supported = converter(new_language_code)[2]
elif len(new_language_code) == 3:
self.language_name = converter(new_language_code)[1]
self.language_code_1 = None
self.language_code_3 = new_language_code
# Check to see if this is a supported language.
if new_language_code in MAIN_LANGUAGES:
self.is_supported = MAIN_LANGUAGES[new_language_code]["supported"]
else:
self.is_supported = False
elif new_language_code == "unknown": # Reset everything
self.language_name = "Unknown"
self.language_code_1 = (
self.is_script
) = self.script_code = self.script_name = None
self.language_code_3 = "unknown"
self.is_supported = True
elif new_language_code in ["multiple", "app"]: # For generic multiples (all)
if new_language_code == "multiple":
self.language_name = "Multiple Languages"
self.language_code_1 = self.language_code_3 = "multiple"
self.status = "untranslated"
self.type = "multiple"
elif new_language_code == "app":
self.language_name = "App"
self.language_code_1 = self.language_code_3 = "app"
self.status = "untranslated"
self.type = "multiple"
try:
# We do a check here to make sure we are not including the same thing twice.
# This is to avoid something like ['Unknown', 'Chinese', 'Chinese']
if self.language_history[-1] != self.language_name:
self.language_history.append(
self.language_name
) # Add the new language name to the history.
except (
AttributeError,
IndexError,
): # There was no language_history defined... Let's create it.
self.language_history = [old_language_name, self.language_name]
if (
new_is_identified != self.is_identified
): # There's a change to the identification
self.is_identified = new_is_identified # Update with said change
def set_script(self, new_script_code):
"""
Change the script (ISO 15924) of the Ajo, assuming it is an Unknown post. This will also now reset the
flair to Unknown.
:param new_script_code: A four-letter ISO 15924 code.
:return:
"""
self.language_name = "Unknown"
self.language_code_1 = None
self.language_code_3 = "unknown"
self.is_supported = True
self.is_script = True
self.script_code = new_script_code
self.script_name = lang_code_search(new_script_code, True)[
0
] # Get the name of the script
def set_defined_multiple(self, new_language_codes):
"""
This is a function that sets the language of an Ajo to a defined Multiple one.
Example: Multiple Languages [AR, KM, VI]
:param new_language_codes: A string of language names or codes, where each element is separated by a `+`.
Example: arabic+khmer+vi
:return:
"""
self.type = "multiple"
old_language_name = str(self.language_name)
# Divide into a list.
set_languages_raw = new_language_codes.split("+")
set_languages_raw = sorted(set_languages_raw, key=str.lower)
# Set some default values up.
set_languages_processed_codes = []
self.status = {} # self
self.language_name = [] # self
self.language_code_1 = []
self.language_code_3 = []