codeuniversity
diff --git a/‎.dockerignore
+1-1 b/‎.dockerignore
+1-1
diff --git a/‎Makefile
+18-2 b/‎Makefile
+18-2
diff --git a/‎README.md
+60-48 b/‎README.md
+60-48
diff --git a/‎api/README.md
+40-32 b/‎api/README.md
+40-32
diff --git a/‎cli/main/main.go
+1-1 b/‎cli/main/main.go
+1-1
diff --git a/‎db/README.md
+47-2 b/‎db/README.md
+47-2
diff --git a/‎db/migrations/000015_create_post_likes.down.sql ‎db/migrations/000013_create_post_likes.down.sql b/‎db/migrations/000015_create_post_likes.down.sql ‎db/migrations/000013_create_post_likes.down.sql
diff --git a/‎db/migrations/000015_create_post_likes.up.sql ‎db/migrations/000013_create_post_likes.up.sql b/‎db/migrations/000015_create_post_likes.up.sql ‎db/migrations/000013_create_post_likes.up.sql
diff --git a/‎db/migrations/000013_add_index_to_post_likes_post_id_ref.down.sql ‎db/migrations/000015_add_index_to_post_likes_post_id_ref.down.sql b/‎db/migrations/000013_add_index_to_post_likes_post_id_ref.down.sql ‎db/migrations/000015_add_index_to_post_likes_post_id_ref.down.sql
diff --git a/‎db/migrations/000013_add_index_to_post_likes_post_id_ref.up.sql ‎db/migrations/000015_add_index_to_post_likes_post_id_ref.up.sql b/‎db/migrations/000013_add_index_to_post_likes_post_id_ref.up.sql ‎db/migrations/000015_add_index_to_post_likes_post_id_ref.up.sql
diff --git a/‎db/migrations/000014_add_index_to_post_tagged_users_post_id_ref.down.sql ‎db/migrations/000016_add_index_to_post_tagged_users_post_id_ref.down.sql b/‎db/migrations/000014_add_index_to_post_tagged_users_post_id_ref.down.sql ‎db/migrations/000016_add_index_to_post_tagged_users_post_id_ref.down.sql
diff --git a/‎db/migrations/000014_add_index_to_post_tagged_users_post_id_ref.up.sql ‎db/migrations/000016_add_index_to_post_tagged_users_post_id_ref.up.sql b/‎db/migrations/000014_add_index_to_post_tagged_users_post_id_ref.up.sql ‎db/migrations/000016_add_index_to_post_tagged_users_post_id_ref.up.sql
@@ -4,7 +4,7 @@
 
 # git stuff
 .gitignore
-README.md
+*.md
 
 # DOCKER stuff
 docker-compose.yml
 
@@ -1,3 +1,4 @@
+# API
 gen-server:
 	protoc --go_out=plugins=grpc:. api/proto/usersearch.proto
 
@@ -9,8 +10,23 @@ gen-client:
 gen-faces:
 	protoc --go_out=plugins=grpc:.  faces/proto/recognizer.proto
 
-run:
-	docker-compose up -d my-kafka postgres connect
+# INSTAGRAM
+
+run-instagram:
+	docker-compose up -d zookeeper my-kafka postgres connect minio neo4j
+	docker-compose up -d --build es-with-plugin
 	sleep 5
+	docker-compose up --build migrate-postgres
 	docker-compose up -d --build
 	docker-compose logs -f
+
+
+# TWITTER
+
+TWITTER_COMPOSE_FILE:=twitter-compose.yml
+
+run-twitter:
+	docker-compose -f $(TWITTER_COMPOSE_FILE) up -d my-kafka postgres connect
+	sleep 5
+	docker-compose -f $(TWITTER_COMPOSE_FILE) up -d --build
+	docker-compose -f $(TWITTER_COMPOSE_FILE) logs -f
@@ -1,73 +1,85 @@
-# SMAG - mvp
-> social media graph abusal
+# Social Record
+
+> Distributed scraping and analysis pipeline for a range of social media platforms
+
+**Table of content**
 
 - [About](#about)
 - [Architectural overview](#architectural-overview)
-  - [Api](#api)
-  - [Postgres DB](#postgres-db)
-- [Requirements](#requirements)
+- [Further reading](#further-reading)
+  - [Detailed documentation](#detailed-documentation)
+  - [Wanna contribute?](#wanna-contribute)
+  - [List of contributors](#list-of-contributors)
+  - [Deployment](#deployment)
 - [Getting started](#getting-started)
-  - [scraper in docker](#scraper-in-docker)
-  - [scraper locally](#scraper-locally)
-- [Postgres change stream](#postgres-change-stream)
+  - [Requirements](#requirements)
+  - [Preparation](#preparation)
+  - [Scraper](#scraper)
 
 ## About
-The goal of this project is to raise awareness about data privacy. The mean to do so is a tool to scrape, combine and analyze public social media data.
+
+The goal of this project is to raise awareness about data privacy. The mean to do so is a tool to scrape, combine and analyze public data from multiple social media sources. <br>
 The results will be available via an API, used for some kind of art exhibition.
 
 ## Architectural overview
-You can find a overview about our architecture on this [miro board](https://miro.com/app/board/o9J_kw7a-qM=/)
 
-### Api
-see details [here](api/README.md)
+![](docs/architecture.png)
 
-### Postgres DB
-see details [here](db/README.md)
+You can find an more detailed overview [here](https://drive.google.com/a/code.berlin/file/d/1uE8oTku322-_eN3QGuiM4ayWZiRXfn9F/view?usp=sharing). <br>
+Open it in draw.io and have a look at the different tabs "High level overview", "Distributed Scraper" and "Face Search".
 
-## Requirements
+## Further reading
 
-- go 1.13 _(or go 1.11+ with the env var `GO111MODULEs=on`)_
-- `docker` and `docker-compose` are available and up-to-date
+### Detailed documentation
 
-## Getting started
+| part        | docs                                       | contact                                          |
+| :---------- | :----------------------------------------- | :----------------------------------------------- |
+| Api         | [`api/README.md`](api/README.md)           | [@jo-fr](https://github.com/jo-fr)               |
+| Frontend    | [`frontend/README.md`](frontend/README.md) | [@lukas-menzel](https://github.com/lukas-menzel) |
+| Postgres DB | [`db/README.md`](db/README.md)             | [@alexmorten](https://github.com/alexmorten)     |
 
-If this is your first time running this:
+### Wanna contribute?
 
-1. Add `127.0.0.1 my-kafka` and `127.0.0.1 minio` to your `/etc/hosts` file
-2. Choose a user_name as a starting point and run `go run cli/main/main.go <instagram|twitter> <user_name>`
-
-As alternative, you can also add the cli to the docker-compose:
-
-```yaml
- cli:
-   build:
-     context: "."
-     dockerfile: "cli/Dockerfile"
-   command: ["<instagram|twitter>", "<user_name>"]
-   depends_on:
-     - "my-kafka"
-   environment:
-     KAFKA_ADDRESS: "my-kafka:9092"
-```
+If you want to join us raising awareness for data privacy have a look into [`CONTRIBUTING.md`](CONTRIBUTING.md)
 
-### scraper in docker
+### List of contributors
 
-```bash
-$ make run
-```
+- @1Jo1 Josef Grieb
+- @Urhengulas Johann Hemmann
+- @alexmorten Alexander Martin
+- @jo-fr Jonathan Freiberger
+- @m-lukas Lukas Müller
+- @lukas-menzel Lukas Menzel
+- @SpringHawk Martin Zaubitzer
 
-### scraper locally
+### Deployment
 
-Have a look into [`docker-compose.yml`](docker-compose.yml), set the neccessary environment variables and run it with the command from the regarding dockerfile.
+The deployment of this project to kubernetes happens in [codeuniversity/smag-deploy](https://github.com/codeuniversity/smag-deploy) _(this is a private repo!)_
 
-## Postgres change stream
+## Getting started
+
+### Requirements
 
-The debezium connector generates a change stream from all the changes in postgres
+| depency                                                      | version                                                            |
+| :----------------------------------------------------------- | :----------------------------------------------------------------- |
+| [`go`](https://golang.org/doc/install)                       | `v1.13` _([go modules](https://blog.golang.org/using-go-modules))_ |
+| [`docker`](https://docs.docker.com/install/)                 | `v19.x`                                                            |
+| [`docker-compose`](https://docs.docker.com/compose/install/) | `v1.24.x`                                                          |
 
-To read from this stream you can
+### Preparation
 
-- get [kt](https://github.com/fgeller/kt)
-- inspect the topic list in kafka `kt topic`, all topic starting with `postgres` are streams from individual tables
-- consume a topic with, for example `kt consume --topic postgres.public.users`
+If this is your first time running this:
+
+1. Add `127.0.0.1 my-kafka` and `127.0.0.1 minio` to your `/etc/hosts` file
+2. Choose a `<user_name>` for your platform of choice `<instagram|twitter>` as a starting point and run
+   ```bash
+   $ go run cli/main/main.go <instagram|twitter> <user_name>
+   ```
 
-The messages are quite verbose, since they include their own schema description. The most interesting part is the `value.payload` -> `kt consume --topic postgres.public.users | jq '.value | fromjson | .payload'`
+### Scraper
+
+Run the instagram- or twitter-scraper in docker:
+
+```bash
+$ make run-<platform_name>
+```
@@ -1,39 +1,47 @@
-# gRPC API
+# SMAG gRPC Web API
 
-- [Usage](#usage)
-- [Functions](#functions)
-- [Testing](#testing)
+## About
 
-## Usage
-- Make sure to `npm install google-protobuf grpc-web`
-- Then import the auto-generated proto files
-    ```javascript
-    import {User, UserName, UserSearchResponse} from "./proto/client/usersearch_pb.js";
-    import {UserSearchServiceClient} from "./proto/client/usersearch_grpc_web_pb.js";
+In our project we are using a [gRPC Web](https://grpc.io/docs/) API. For that we are using an [envoy proxy](https://www.envoyproxy.io/docs/envoy/latest/) to be able to connect to the gRPC Server. As our system is not publicly accessible an AWS Account in our Organisation with the appropriate access is required.
+
+## Requirements
 
-    var userSearch = new UserSearchServiceClient('http://localhost:8080');
+In order to successfully use our api make sure to have:
 
-    var request = new UserName();
-    request.setUserName("codeuniversity");
-    
-    userSearch.getUserWithUsername(request, {},function(err, response) {
-        //...
-    });
-- The default address for the database is `localhost`.
-  If you want to change that simply add the enviroment variable `GRPC_POSTGRES_HOST` to the `grpc-server`container
+- a running [kubernetes setup](https://github.com/codeuniversity/smag-deploy/blob/master/README.md) (permssion required)
+- _optional for local testing_: [protoc](http://google.github.io/proto-lens/installing-protoc.html) to generate the protofiles for the frontend
+
+## Usage
 
+To use the production enviroment do the following steps:
+
+1. Get name of envoy proxy `kubectl get pods | grep envoy`
+2. Forward the envoy-pod port with `kubectl port-forward envoy-proxy-deployment-6b89675d5b-d86c4 4000:8080`
+3. To make use of the API in the React Frontend import and run the following:
+   ```javascript
+   import {
+     User,
+     UserNameRequest,
+     UserIdRequest,
+     InstaPostsResponse,
+     UserSearchResponse
+   } from "./protofiles/client/usersearch_pb.js";
+   import { UserSearchServiceClient } from "./protofiles/client/usersearch_grpc_web_pb.js";
+   var userSearch = new UserSearchServiceClient("http://localhost:4000");
+   var request = new UserName();
+   request.setUserName("codeuniversity");
+   userSearch.getUserWithUsername(request, {}, function(err, response) {
+     //example function call...
+   });
+   ```
 
 ## Functions
-- `getUserWithUsername(UserNameRequest) User`
-    > Queries the Database for one specific User
-- `getAllUsersWithUsername(UserNameRequest) UserSearchResponse`
-    > Queries the database for all users that have a similar usenames and returns array of user
-- `getInstaPostssWithUserid(UserIdRequest) InstaPostsResponse`
-    > GetInstaPostsWithUserId takes the User id and returns all Instagram Posts of a User
-- `getTaggedPostsWithUserId(UserIdRequest) InstaPostsResponse`
-    > GetTaggedPostsWithUserId returns all Posts the given User is tagged on
-
-## Testing
-1. `docker-compose up`
-1. initialize the Database with `make init-db`
-1. Then connect with the envoy-proxy via `localhost:4000`
+
+To check the attributes of the proto messages take a look at the protofile [userserach.proto](https://github.com/codeuniversity/smag-mvp/blob/master/api/proto/usersearch.proto)
+
+| **Method** | **Function Name**        | **Input Message** | **Return Message** |
+| ---------- | ------------------------ | ----------------- | ------------------ |
+| GET        | getUserWithUsername      | UserNameRequest   | User               |
+| GET        | getAllUsersLikeUsername  | UserNameRequest   | UserSearchResponse |
+| GET        | getTaggedPostsWithUserId | UserIdRequest     | InstaPostsResponse |
+| GET        | getInstaPostssWithUserid | UserIdRequest     | UserSearchResponse |
@@ -13,7 +13,7 @@ import (
 func main() {
 	kafkaAddress := utils.GetStringFromEnvWithDefault("KAFKA_ADDRESS", "my-kafka:9092")
 	instagramTopic := utils.GetStringFromEnvWithDefault("KAFKA_INSTAGRAM_TOPIC", "user_names")
-	twitterTopic := utils.GetStringFromEnvWithDefault("KAFKA_TWITTER_TOPIC", "twitter-user_names")
+	twitterTopic := utils.GetStringFromEnvWithDefault("KAFKA_TWITTER_TOPIC", "twitter.scraped.user_names")
 
 	if len(os.Args) < 3 {
 		panic("Invalid argumemts. Usage: cli <instagram|twitter> <username>")
 
@@ -1,4 +1,49 @@
 # postgres database
 
-## schema
-![db_schema](../docs/db_schema.png)
+We are using [POSTGRESQL](https://www.postgresql.org/) as the store for the raw scraped data from the various data sources. <br>
+The schemas are quite similar to the scraped data structures.
+
+**Table of Contents**
+
+- [Instagram](#instagram)
+  - [Remarks](#remarks)
+- [Twitter](#twitter)
+- [Debezium](#debezium)
+
+## [Instagram](https://www.instagram.com/)
+
+This database is the more sophisticated one and is running in production.
+
+![insta_schema](../docs/insta_schema.png)
+
+### Remarks
+
+- `internal_picture_url` is pointing to the downloaded picture on S3
+
+## Twitter
+
+This database is not in production yet and at the moment only dumps the tweaked scraped data.
+
+![twitter_schema](../docs/twitter_schema.png)
+
+## Debezium
+
+The [debezium](https://github.com/debezium/debezium) connector generates a change stream from all change events in postgres (`read`, `create`, `update`, `delete`) and writes them into a kafka-topic `"postgres.public.<table_name>"`
+
+To read from this stream you can:
+
+- get [`kafkacat`](https://github.com/edenhill/kafkacat)
+- inspect the topic list in kafka:
+  ```bash
+  $ kafkacat -L -b my-kafka | grep 'topic "postgres'
+  ```
+- consume a topic with
+  ```bash
+  $ kafkacat -b my-kafka -t <topic_name>
+  ```
+
+The messages are quite verbose, since they include their own schema description. The most interesting part is the `value.payload`:
+
+```bash
+$ kafkacat -b my-kafka -topic postgres.public.users | jq '.value | fromjson | .payload'`
+```