Skip to content

Commit

Permalink
Merge #249: fix: Wrong infohash when info dict contains source field
Browse files Browse the repository at this point in the history
8fe0955 fix: clippy warning (Jose Celano)
bc04231 doc: [#242] improve infohash generation documentaion (Jose Celano)
6fc4050 test: [#242] add tests for infohash calculation (Jose Celano)
c3e61ea fix: [#242] wrong infohash when info dict contains source field (Jose Celano)

Pull request description:

  When you define a "source" field value in the "info" dictionary inside the torrent file, that field changes the torrent infohash value. We did not save that field in the database and in the in-memory struct `TorrentInfo`, so the calculated infohash was wrong because this field belongs to the `info` key.

  This PR adds the missing field.

  ### TODO

  - [x] Test with MySQL
  - [x] Update the documentation about how infohashes are calculated

  ### Improvements

  - [x] Add tests for the infohash calculation with different examples of `TorrentInfo` values.
  - [ ] The `TorrentInfo` struct should be explicit about the two types of torrents: single file or multiple file. See:
  https://wiki.theory.org/BitTorrentSpecification#Info_in_Single_File_Mode

  ### Other considerations

  I have not found any BEP describing this source key in the info dictionary. There could be more non-official fields. Maybe instead of adding a hardcoded field in the `torrust_torrent` table, we should add a `json` value or something that allows us to save all the extra non-official fields, but I have not seen any other. What do you think @da2ce7?

  ### Extra

  Info dict key with a single file:

  ```json
  {
        "length": 172204,
        "name": "mandelbrot_2048x2048.png",
        "piece length": 16384,
        "pieces": "<hex>7D 91 71 0D 9D 4D BA 88 9B 54 20 54 D5 26 72 8D 5A 86 3F E1 21 DF 77 C7 F7 BB 6C 77 96 21 66 25 38 C5 D9 CD AB 8B 08 EF 8C 24 9B B2 F5 C4 CD 2A DF 0B C0 0C F0 AD DF 72 90 E5 B6 41 4C 23 6C 47 9B 8E 9F 46 AA 0C 0D 8E D1 97 FF EE 68 8B 5F 34 A3 87 D7 71 C5 A6 F9 8E 2E A6 31 7C BD F0 F9 E2 23 F9 CC 80 AF 54 00 04 F9 85 69 1C 77 89 C1 76 4E D6 AA BF 61 A6 C2 80 99 AB B6 5F 60 2F 40 A8 25 BE 32 A3 3D 9D 07 0C 79 68 98 D4 9D 63 49 AF 20 58 66 26 6F 98 6B 6D 32 34 CD 7D 08 15 5E 1A D0 00 09 57 AB 30 3B 20 60 C1 DC 12 87 D6 F3 E7 45 4F 70 67 09 36 31 55 F2 20 F6 6C A5 15 6F 2C 89 95 69 16 53 81 7D 31 F1 B6 BD 37 42 CC 11 0B B2 FC 2B 49 A5 85 B6 FC 76 74 44 93</hex>",
        "private": 1,
        "source": "Source"
     }
  ```

  Info dict key with a multiple files:

  ```json
  {
        "files": [
           {
              "length": 37,
              "path": [
                 "file-425ef9ca-014d-403f-8f15-3ece9e3fad38.txt"
              ]
           },
           {
              "length": 37,
              "path": [
                 "file-deeafa50-2e25-4f38-bec0-92865ca6bb1d.txt"
              ]
           }
        ],
        "name": "dir-839a49f0-cabc-4efb-ad5f-b6fb15f8a467",
        "piece length": 16384,
        "pieces": "<hex>8E 47 18 74 52 8E 68 92 DD 86 66 F0 0B DD AD B1 08 DC 69 44</hex>",
        "private": 1,
        "source": "Source"
     }
  ```

ACKs for top commit:
  josecelano:
    ACK 8fe0955

Tree-SHA512: edbe69114ad971ce66161090243dca10ab43505c0eccc71caa9630b27e14d11df37d17e671c8767b9f95d01109ab486c18fbb7996b04744c8ea17fa4406a6d2a
  • Loading branch information
josecelano committed Aug 7, 2023
2 parents 27354a4 + 8fe0955 commit 5465e0c
Show file tree
Hide file tree
Showing 8 changed files with 317 additions and 16 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ALTER TABLE torrust_torrents ADD COLUMN source TEXT DEFAULT NULL
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ALTER TABLE torrust_torrents ADD COLUMN source TEXT DEFAULT NULL
3 changes: 2 additions & 1 deletion src/databases/mysql.rs
Original file line number Diff line number Diff line change
Expand Up @@ -441,7 +441,7 @@ impl Database for Mysql {
let private = torrent.info.private.unwrap_or(0);

// add torrent
let torrent_id = query("INSERT INTO torrust_torrents (uploader_id, category_id, info_hash, size, name, pieces, piece_length, private, root_hash, date_uploaded) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, UTC_TIMESTAMP())")
let torrent_id = query("INSERT INTO torrust_torrents (uploader_id, category_id, info_hash, size, name, pieces, piece_length, private, root_hash, `source`, date_uploaded) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, UTC_TIMESTAMP())")
.bind(uploader_id)
.bind(category_id)
.bind(info_hash.to_lowercase())
Expand All @@ -451,6 +451,7 @@ impl Database for Mysql {
.bind(torrent.info.piece_length)
.bind(private)
.bind(root_hash)
.bind(torrent.info.source.clone())
.execute(&self.pool)
.await
.map(|v| i64::try_from(v.last_insert_id()).expect("last ID is larger than i64"))
Expand Down
3 changes: 2 additions & 1 deletion src/databases/sqlite.rs
Original file line number Diff line number Diff line change
Expand Up @@ -431,7 +431,7 @@ impl Database for Sqlite {
let private = torrent.info.private.unwrap_or(0);

// add torrent
let torrent_id = query("INSERT INTO torrust_torrents (uploader_id, category_id, info_hash, size, name, pieces, piece_length, private, root_hash, date_uploaded) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, strftime('%Y-%m-%d %H:%M:%S',DATETIME('now', 'utc')))")
let torrent_id = query("INSERT INTO torrust_torrents (uploader_id, category_id, info_hash, size, name, pieces, piece_length, private, root_hash, `source`, date_uploaded) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, strftime('%Y-%m-%d %H:%M:%S',DATETIME('now', 'utc')))")
.bind(uploader_id)
.bind(category_id)
.bind(info_hash.to_lowercase())
Expand All @@ -441,6 +441,7 @@ impl Database for Sqlite {
.bind(torrent.info.piece_length)
.bind(private)
.bind(root_hash)
.bind(torrent.info.source.clone())
.execute(&self.pool)
.await
.map(|v| v.last_insert_rowid())
Expand Down
68 changes: 68 additions & 0 deletions src/models/info_hash.rs
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,75 @@
//!
//! You can hash that byte string with <https://www.pelock.com/products/hash-calculator>
//!
//! > NOTICE: you need to remove the line breaks from the byte string before hashing.
//!
//! ```text
//! 64363a6c656e6774686931373232303465343a6e616d6532343a6d616e64656c62726f745f3230343878323034382e706e6731323a7069656365206c656e67746869313633383465363a7069656365733232303a7d91710d9d4dba889b542054d526728d5a863fe121df77c7f7bb6c779621662538c5d9cdab8b08ef8c249bb2f5c4cd2adf0bc00cf0addf7290e5b6414c236c479b8e9f46aa0c0d8ed197ffee688b5f34a387d771c5a6f98e2ea6317cbdf0f9e223f9cc80af540004f985691c7789c1764ed6aabf61a6c28099abb65f602f40a825be32a33d9d070c796898d49d6349af205866266f986b6d3234cd7d08155e1ad0000957ab303b2060c1dc1287d6f3e7454f706709363155f220f66ca5156f2c8995691653817d31f1b6bd3742cc110bb2fc2b49a585b6fc7674449365
//! ```
//!
//! The result is a 20-char string: `5452869BE36F9F3350CCEE6B4544E7E76CAAADAB`
//!
//! The `info` dictionary can contain more fields, like the following example:
//!
//! ```json
//! {
//! "length": 172204,
//! "name": "mandelbrot_2048x2048.png",
//! "piece length": 16384,
//! "pieces": "<hex>7D 91 71 0D 9D 4D BA 88 9B 54 20 54 D5 26 72 8D 5A 86 3F E1 21 DF 77 C7 F7 BB 6C 77 96 21 66 25 38 C5 D9 CD AB 8B 08 EF 8C 24 9B B2 F5 C4 CD 2A DF 0B C0 0C F0 AD DF 72 90 E5 B6 41 4C 23 6C 47 9B 8E 9F 46 AA 0C 0D 8E D1 97 FF EE 68 8B 5F 34 A3 87 D7 71 C5 A6 F9 8E 2E A6 31 7C BD F0 F9 E2 23 F9 CC 80 AF 54 00 04 F9 85 69 1C 77 89 C1 76 4E D6 AA BF 61 A6 C2 80 99 AB B6 5F 60 2F 40 A8 25 BE 32 A3 3D 9D 07 0C 79 68 98 D4 9D 63 49 AF 20 58 66 26 6F 98 6B 6D 32 34 CD 7D 08 15 5E 1A D0 00 09 57 AB 30 3B 20 60 C1 DC 12 87 D6 F3 E7 45 4F 70 67 09 36 31 55 F2 20 F6 6C A5 15 6F 2C 89 95 69 16 53 81 7D 31 F1 B6 BD 37 42 CC 11 0B B2 FC 2B 49 A5 85 B6 FC 76 74 44 93</hex>"
//! "private": 1,
//! "md5sum": "e2ea6317cbdf0f9e223f9cc80af54000
//! "source": "GGn",
//! }
//! ```
//!
//! Refer to the struct [`TorrentInfo`](crate::models::torrent_file::TorrentInfo) for more info.
//!
//! Regarding the `source` field, it is not clear was was the initial intention
//! for that field. It could be an string to identify the source of the torrent.
//! But it has been used by private trackers to identify the tracker that
//! created the torrent and it's usually a three-char string. Refer to
//! <https://github.com/qbittorrent/qBittorrent/discussions/19406> for more info.
//!
//! The `md5sum` field is a string with the MD5 hash of the file. It seems is
//! not used by the protocol.
//!
//! Some fields are exclusive to `BitTorrent` v2.
//!
//! For the [`]BitTorrent` Version 1 specification](https://www.bittorrent.org/beps/bep_0003.html) there are two types of torrent
//! files: single file and multiple files. Some fields are only valid for one
//! type of torrent file.
//!
//! An example for a single-file torrent info dictionary:
//!
//! ```json
//! {
//! "length": 11,
//! "name": "sample.txt",
//! "piece length": 16384,
//! "pieces": "<hex>D4 91 58 7F 1C 42 DF F0 CB 0F F5 C2 B8 CE FE 22 B3 AD 31 0A</hex>"
//! }
//! ```
//!
//! An example for a multi-file torrent info dictionary:
//!
//! ```json
//! {
//! "files": [
//! {
//! "length": 11,
//! "path": [
//! "sample.txt"
//! ]
//! }
//! ],
//! "name": "sample",
//! "piece length": 16384,
//! "pieces": "<hex>D4 91 58 7F 1C 42 DF F0 CB 0F F5 C2 B8 CE FE 22 B3 AD 31 0A</hex>"
//! }
//! ```
//!
//! An example torrent creator implementation can be found [here](https://www.bittorrent.org/beps/bep_0052_torrent_creator.py).
use std::panic::Location;

use thiserror::Error;
Expand Down
231 changes: 231 additions & 0 deletions src/models/torrent_file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ pub struct TorrentInfo {
#[serde(default)]
#[serde(rename = "root hash")]
pub root_hash: Option<String>,
#[serde(default)]
pub source: Option<String>,
}

impl TorrentInfo {
Expand Down Expand Up @@ -123,6 +125,7 @@ impl Torrent {
private,
path: None,
root_hash: None,
source: None,
};

// a torrent file has a root hash or a pieces key, but not both.
Expand Down Expand Up @@ -295,3 +298,231 @@ pub struct DbTorrentInfo {
pub struct DbTorrentAnnounceUrl {
pub tracker_url: String,
}

#[cfg(test)]
mod tests {

mod info_hash_calculation_for_version_v1 {

use serde_bytes::ByteBuf;

use crate::models::torrent_file::{Torrent, TorrentInfo};

#[test]
fn the_parsed_torrent_file_should_calculated_the_torrent_info_hash() {
/* The sample.txt content (`mandelbrot`):
```
6d616e64656c62726f740a
```
The sample.txt.torrent content:
```
6431303a6372656174656420627931383a71426974746f7272656e742076
342e352e3431333a6372656174696f6e2064617465693136393131343935
373265343a696e666f64363a6c656e67746869313165343a6e616d653130
3a73616d706c652e74787431323a7069656365206c656e67746869313633
383465363a70696563657332303ad491587f1c42dff0cb0ff5c2b8cefe22
b3ad310a6565
```
```json
{
"created by": "qBittorrent v4.5.4",
"creation date": 1691149572,
"info": {
"length": 11,
"name": "sample.txt",
"piece length": 16384,
"pieces": "<hex>D4 91 58 7F 1C 42 DF F0 CB 0F F5 C2 B8 CE FE 22 B3 AD 31 0A</hex>"
}
}
```
*/

let sample_data_in_txt_file = "mandelbrot\n";

let info = TorrentInfo {
name: "sample.txt".to_string(),
pieces: Some(ByteBuf::from(vec![
// D4 91 58 7F 1C 42 DF F0 CB 0F F5 C2 B8 CE FE 22 B3 AD 31 0A // hex
212, 145, 88, 127, 28, 66, 223, 240, 203, 15, 245, 194, 184, 206, 254, 34, 179, 173, 49, 10, // dec
])),
piece_length: 16384,
md5sum: None,
length: Some(sample_data_in_txt_file.len().try_into().unwrap()),
files: None,
private: None,
path: None,
root_hash: None,
source: None,
};

let torrent = Torrent {
info: info.clone(),
announce: None,
announce_list: Some(vec![]),
creation_date: None,
comment: None,
created_by: None,
nodes: None,
encoding: None,
httpseeds: None,
};

assert_eq!(torrent.info_hash(), "79fa9e4a2927804fe4feab488a76c8c2d3d1cdca");
}

mod infohash_should_be_calculated_for {

use serde_bytes::ByteBuf;

use crate::models::torrent_file::{Torrent, TorrentFile, TorrentInfo};

#[test]
fn a_simple_single_file_torrent() {
let sample_data_in_txt_file = "mandelbrot\n";

let info = TorrentInfo {
name: "sample.txt".to_string(),
pieces: Some(ByteBuf::from(vec![
// D4 91 58 7F 1C 42 DF F0 CB 0F F5 C2 B8 CE FE 22 B3 AD 31 0A // hex
212, 145, 88, 127, 28, 66, 223, 240, 203, 15, 245, 194, 184, 206, 254, 34, 179, 173, 49, 10, // dec
])),
piece_length: 16384,
md5sum: None,
length: Some(sample_data_in_txt_file.len().try_into().unwrap()),
files: None,
private: None,
path: None,
root_hash: None,
source: None,
};

let torrent = Torrent {
info: info.clone(),
announce: None,
announce_list: Some(vec![]),
creation_date: None,
comment: None,
created_by: None,
nodes: None,
encoding: None,
httpseeds: None,
};

assert_eq!(torrent.info_hash(), "79fa9e4a2927804fe4feab488a76c8c2d3d1cdca");
}

#[test]
fn a_simple_multi_file_torrent() {
let sample_data_in_txt_file = "mandelbrot\n";

let info = TorrentInfo {
name: "sample".to_string(),
pieces: Some(ByteBuf::from(vec![
// D4 91 58 7F 1C 42 DF F0 CB 0F F5 C2 B8 CE FE 22 B3 AD 31 0A // hex
212, 145, 88, 127, 28, 66, 223, 240, 203, 15, 245, 194, 184, 206, 254, 34, 179, 173, 49, 10, // dec
])),
piece_length: 16384,
md5sum: None,
length: None,
files: Some(vec![TorrentFile {
path: vec!["sample.txt".to_string()],
length: sample_data_in_txt_file.len().try_into().unwrap(),
md5sum: None,
}]),
private: None,
path: None,
root_hash: None,
source: None,
};

let torrent = Torrent {
info: info.clone(),
announce: None,
announce_list: Some(vec![]),
creation_date: None,
comment: None,
created_by: None,
nodes: None,
encoding: None,
httpseeds: None,
};

assert_eq!(torrent.info_hash(), "aa2aca91ab650c4d249c475ca3fa604f2ccb0d2a");
}

#[test]
fn a_simple_single_file_torrent_with_a_source() {
let sample_data_in_txt_file = "mandelbrot\n";

let info = TorrentInfo {
name: "sample.txt".to_string(),
pieces: Some(ByteBuf::from(vec![
// D4 91 58 7F 1C 42 DF F0 CB 0F F5 C2 B8 CE FE 22 B3 AD 31 0A // hex
212, 145, 88, 127, 28, 66, 223, 240, 203, 15, 245, 194, 184, 206, 254, 34, 179, 173, 49, 10, // dec
])),
piece_length: 16384,
md5sum: None,
length: Some(sample_data_in_txt_file.len().try_into().unwrap()),
files: None,
private: None,
path: None,
root_hash: None,
source: Some("ABC".to_string()), // The tracker three-letter code
};

let torrent = Torrent {
info: info.clone(),
announce: None,
announce_list: Some(vec![]),
creation_date: None,
comment: None,
created_by: None,
nodes: None,
encoding: None,
httpseeds: None,
};

assert_eq!(torrent.info_hash(), "ccc1cf4feb59f3fa85c96c9be1ebbafcfe8a9cc8");
}

#[test]
fn a_simple_single_file_private_torrent() {
let sample_data_in_txt_file = "mandelbrot\n";

let info = TorrentInfo {
name: "sample.txt".to_string(),
pieces: Some(ByteBuf::from(vec![
// D4 91 58 7F 1C 42 DF F0 CB 0F F5 C2 B8 CE FE 22 B3 AD 31 0A // hex
212, 145, 88, 127, 28, 66, 223, 240, 203, 15, 245, 194, 184, 206, 254, 34, 179, 173, 49, 10, // dec
])),
piece_length: 16384,
md5sum: None,
length: Some(sample_data_in_txt_file.len().try_into().unwrap()),
files: None,
private: Some(1),
path: None,
root_hash: None,
source: None,
};

let torrent = Torrent {
info: info.clone(),
announce: None,
announce_list: Some(vec![]),
creation_date: None,
comment: None,
created_by: None,
nodes: None,
encoding: None,
httpseeds: None,
};

assert_eq!(torrent.info_hash(), "d3a558d0a19aaa23ba6f9f430f40924d10fefa86");
}
}
}
}
1 change: 1 addition & 0 deletions src/services/torrent_file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ mod tests {
private: Some(0),
path: None,
root_hash: None,
source: None,
},
announce: None,
announce_list: Some(vec![]),
Expand Down
Loading

0 comments on commit 5465e0c

Please sign in to comment.