{"payload":{"pageCount":1,"repositories":[{"type":"Public","name":"GlotWeb","owner":"cisnlp","isFork":false,"description":"GlotWeb: Web Indexing for Low-Resource Languages -- under construction.","allTopics":["multilingual","dataset","glot","low-resource-languages","news-dataset","awsome-list"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":2,"starsCount":5,"forksCount":0,"license":"Creative Commons Zero v1.0 Universal","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-08-23T13:06:52.931Z"}},{"type":"Public","name":"GlotLID","owner":"cisnlp","isFork":false,"description":"GlotLID: Language Identification with Support for More Than 2000 Labels -- EMNLP 2023","allTopics":["language-detection","multlingual","language-detector","language-recognition","glot","lid","language-identification","language-classification","language-identification-toolkit","low-resource-languages","language-detection-library","language-identifier","language-detection-lib","langid","low-resource-nlp"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":1,"starsCount":84,"forksCount":7,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-07-05T14:21:12.765Z"}},{"type":"Public","name":"Transliteration-PPA","owner":"cisnlp","isFork":false,"description":"Breaking the Script Barrier in Multilingual Pre-Trained Language Models with Transliteration-Based Post-Training Alignment","allTopics":["multilingual","transliteration","cross-lingual-transfer","script-barrier"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":2,"forksCount":0,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-07-03T21:13:48.373Z"}},{"type":"Public","name":"MaskLID","owner":"cisnlp","isFork":false,"description":"MaskLID: Code-Switching Language Identification through Iterative Masking -- ACL 2024","allTopics":["language-identification","code-mixing","code-switching","language-identification-toolkit","code-switch","language-identifier"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":3,"forksCount":1,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-06-11T11:58:44.052Z"}},{"type":"Public","name":"GlotScript","owner":"cisnlp","isFork":false,"description":"GlotScript: A Resource and Tool for Low Resource Writing System Identification -- LREC 2024","allTopics":["unicode","script","language-detection","iso15924","alphabet","unicode-characters","writing-systems","writing-system","glot","language-identification","unicodedata","script-detection"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":12,"forksCount":1,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-06-07T12:56:24.890Z"}},{"type":"Public","name":"Taxi1500","owner":"cisnlp","isFork":false,"description":"","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":1,"starsCount":5,"forksCount":0,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-05-31T10:18:33.665Z"}},{"type":"Public","name":"TransMI","owner":"cisnlp","isFork":false,"description":"TransMI: A Framework to Create Strong Baselines from Multilingual Pretrained Language Models for Transliterated Data","allTopics":["transliteration","multilinguality","crosslingual-transfer","tokenizer-modification"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":4,"forksCount":0,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-05-30T21:19:10.257Z"}},{"type":"Public","name":"TransliCo","owner":"cisnlp","isFork":false,"description":"TransliCo: A Contrastive Learning Framework to Address the Script Barrier in Multilingual Pretrained Language Models","allTopics":["transliteration","multilinguality","crosslingual-transfer","script-barrier"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":4,"forksCount":0,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-05-23T18:32:09.376Z"}},{"type":"Public","name":"XAMPLER","owner":"cisnlp","isFork":false,"description":"XAMPLER: Learning to Retrieve Cross-Lingual In-Context Examples","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":2,"forksCount":0,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-05-09T06:48:10.178Z"}},{"type":"Public","name":"Glot500","owner":"cisnlp","isFork":false,"description":"Glot500: Scaling Multilingual Corpora and Language Models to 500 Languages -- ACL 2023","allTopics":["multilingual","nlp","natural-language-processing","acl","dataset","glot","xlm","multilingual-models","xlm-r","multilingual-nlp","glot500"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":2,"starsCount":96,"forksCount":3,"license":"Other","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-04-20T23:47:45.126Z"}},{"type":"Public","name":"mPLM-Sim","owner":"cisnlp","isFork":false,"description":"mPLM-Sim: Better Cross-Lingual Similarity and Transfer in Multilingual Pretrained Language Models","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":10,"forksCount":0,"license":"Other","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-01-19T13:28:03.474Z"}},{"type":"Public","name":"ofa","owner":"cisnlp","isFork":false,"description":"A Framework aims to wisely initialize unseen subword embeddings in PLMs for efficient large-scale continued pretraining","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":11,"forksCount":1,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-11-26T10:06:01.269Z"}},{"type":"Public","name":"simalign","owner":"cisnlp","isFork":false,"description":"Obtain Word Alignments using Pretrained Language Models (e.g., mBERT)","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":4,"starsCount":345,"forksCount":47,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-11-07T21:21:45.690Z"}},{"type":"Public","name":"parcoure","owner":"cisnlp","isFork":false,"description":"ParCourE - Parallel Corpus Explorer","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":12,"forksCount":0,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2021-12-27T20:27:02.266Z"}},{"type":"Public","name":"graph-align","owner":"cisnlp","isFork":false,"description":"code for EMNLP graph align paper","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":9,"forksCount":0,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2021-11-05T15:45:40.862Z"}},{"type":"Public","name":"bias-in-nlp","owner":"cisnlp","isFork":false,"description":"Literature overview: gender bias in natural language processing","allTopics":["nlp","bias","nlp-machine-learning","gender-bias"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":10,"forksCount":0,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2021-01-26T09:59:47.456Z"}},{"type":"Public","name":"semi-markov-crf","owner":"cisnlp","isFork":false,"description":"Code for paper \"Neural Semi-Markov Conditional Random Fields for Robust Character-Based Part-of-Speech Tagging\"","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":17,"forksCount":4,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2019-05-31T18:22:16.216Z"}}],"repositoryCount":17,"userInfo":null,"searchable":true,"definitions":[],"typeFilters":[{"id":"all","text":"All"},{"id":"public","text":"Public"},{"id":"source","text":"Sources"},{"id":"fork","text":"Forks"},{"id":"archived","text":"Archived"},{"id":"template","text":"Templates"}],"compactMode":false},"title":"cisnlp repositories"}