Skip to content

Commit

Permalink
add ipv6, fix ip range generator, misc
Browse files Browse the repository at this point in the history
  • Loading branch information
klkvsk committed Oct 31, 2023
1 parent 61955e8 commit ad8a6f2
Show file tree
Hide file tree
Showing 20 changed files with 3,654 additions and 1,723 deletions.
18 changes: 13 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,16 @@ Whoeasy - smart WHOIS client and parser for PHP
====================

Lookup domain names, IP addresses and AS numbers by WHOIS.

Parse answers into structured data.
Use proxies to counter rate limits.

Installation
------------

Install from composer:
Install from composer (until 1.0 prefer dev-master over releases, it's buggy anyway):

```shell
composer install klkvsk/whoeasy
composer install klkvsk/whoeasy=dev-master
```

Usage
Expand All @@ -30,8 +31,12 @@ echo $answer->result->registrar->name;
You can customize the factory by extending `Whois`
or you can utilize `WhoisClient` and `WhoisParser` directly.

Whoeasy is easily extensible. You can add your own client adapters, parsers, server configs, etc.
Whoeasy is easily extensible.
You can add your own client adapters, parsers, server configs, proxy providers, etc.

Built in client adapters are:
- CurlTelnet - default if ext-curl is installed. Supports any proxies curl does.
- Socket - fallback, uses `stream_socket_client`. Supports only HTTP(s)-tunnel proxies.

Whois-servers registry
-----
Expand All @@ -40,10 +45,13 @@ The list of servers is automatically generated from https://github.com/rfc1036/w
a default `whois` tool in most Linux distributions. This is the most up-to-date source
of correct whois servers per tld.

See [BuitinClientRegistry](./src/Client/Registry/BuiltinRegistryRegistry.php)
for compiled list. See [generator](./generator) for source lists and build script.


ToDos
-----
* Querying NIC handles, IP and ASN
* Querying ASN
* Using RDAP as an alternative adapter
* Replace Novutec parsing templates with own

Expand Down
3 changes: 3 additions & 0 deletions composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@
"ext-curl": "*"
},
"autoload": {
"files": [
"src/functions.php"
],
"psr-4": {
"Klkvsk\\Whoeasy\\": "src/"
}
Expand Down
19 changes: 19 additions & 0 deletions generator/as32_del_list
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# http://www.iana.org/assignments/as-numbers

# actually I listed here also the unallocated space reserved for each RIR

131077 131086 whois.nic.ad.jp
131092 131101 whois.nic.or.kr
131152 131161 whois.nic.ad.jp
131791 131890 whois.nic.or.kr
131893 131992 whois.nic.ad.jp

2.0 2.65535 apnic
3.0 3.65535 ripe
4.0 4.65535 lacnic
5.0 5.65535 afrinic
6.0 6.65535 arin

# private ASN block
4200000000 4294967294 ripe

97 changes: 97 additions & 0 deletions generator/as_del_list
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
# http://www.iana.org/assignments/as-numbers

248 251 ripe
306 371 whois.nic.mil
379 508 whois.nic.mil
1101 1200 ripe
1267 1275 ripe
1877 1901 ripe
2043 2043 ripe
2047 2047 ripe
2057 2136 ripe
2387 2488 ripe
2497 2528 whois.nic.ad.jp
2585 2614 ripe
2773 2822 ripe
2830 2879 ripe
3154 3353 ripe
4608 4864 apnic
5120 5376 whois.nic.mil
5377 5631 ripe
5800 6055 whois.nic.mil
6656 6911 ripe
7467 7722 apnic
8192 9215 ripe
9591 9622 whois.nic.ad.jp
9628 9647 whois.nic.or.kr
9683 9712 whois.nic.or.kr
9753 9784 whois.nic.or.kr
9840 9871 whois.nic.or.kr
9943 9982 whois.nic.or.kr
9990 10021 whois.nic.ad.jp
10034 10073 whois.nic.or.kr
10154 10198 whois.nic.or.kr
9216 10239 apnic
12288 13311 ripe
15360 16383 ripe
17503 17534 whois.nic.ad.jp
17567 17616 whois.nic.or.kr
17673 17704 whois.nic.ad.jp
17832 17881 whois.nic.or.kr
17930 17961 whois.nic.ad.jp
18067 18098 whois.nic.ad.jp
18121 18152 whois.nic.ad.jp
18259 18290 whois.nic.ad.jp
18294 18343 whois.nic.or.kr
17408 18431 apnic
20480 21503 ripe
23552 23601 whois.nic.or.kr
23612 23643 whois.nic.ad.jp
23773 23836 whois.nic.ad.jp
24248 24297 whois.nic.ad.jp
23552 24575 apnic
24576 25599 ripe
26592 26623 lacnic
27648 28671 lacnic
28672 29695 ripe
30980 30999 afrinic
30720 31743 ripe
34515 34519 afrinic
33792 35839 ripe
36864 37887 afrinic
37888 37927 whois.nic.ad.jp
38086 38135 whois.nic.or.kr
38387 38436 whois.nic.or.kr
38627 38656 whois.nic.ad.jp
38660 38709 whois.nic.or.kr
37888 38911 apnic
38912 39935 ripe
40960 45055 ripe
45360 45409 whois.nic.or.kr
45672 45691 whois.nic.ad.jp
45963 46012 whois.nic.or.kr
45056 46079 apnic
47104 52223 ripe
52224 53247 lacnic
55372 55396 whois.nic.ad.jp
55584 55633 whois.nic.or.kr
55888 55912 whois.nic.ad.jp
55296 56319 apnic
56320 58367 ripe
58645 58654 whois.nic.ad.jp
58784 58793 whois.nic.ad.jp
59091 59130 whois.nic.ad.jp
58368 59391 apnic
59392 61439 ripe
61440 61951 lacnic
61952 62463 ripe
63488 64098 apnic
64099 64197 lacnic
# catch all: everything else comes from ARIN
1 64296 arin
64297 64395 apnic
64396 64495 ripe

# documentation and private ASN block
64496 65534 ripe

120 changes: 117 additions & 3 deletions generator/build.php
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
<?php

use function Klkvsk\Whoeasy\ip6prefix2long;

require __DIR__ . '/../src/functions.php';

set_error_handler(fn($s, $m, $f = null, $l = null) => new ErrorException($m, 0, $s, $f, $l), E_ALL);

new class {
protected array $servers = [];
protected array $toplevelRefs = [];
Expand All @@ -16,6 +22,11 @@ public function __construct()
'tld_serv_list' => $this->parseTldServ(...),
'servers_charset_list' => $this->parseServerCharset(...),
'nic_handles_list' => $this->parseNicHandle(...),
'ip_del_recovered.h' => $this->parseIpv4Recovered(...),
'ip_del_list' => $this->parseIpv4(...),
'ip6_del_list' => $this->parseIpv6(...),
// 'as_del_list' => $this->parseAsn(...),
// 'as32_del_list' => $this->parseAsn(...),
];

foreach ($files as $file => $importFn) {
Expand All @@ -42,8 +53,8 @@ protected function parseNewGTld(string $line): void
$this->servers,
[
$serverName => [
'uri' => "whois://$serverName",
'tlds' => [ ".$tld" ],
'uri' => "whois://$serverName",
'tlds' => [ ".$tld" ],
],
]
);
Expand Down Expand Up @@ -169,6 +180,84 @@ private function parseNicHandle($line): void
);
}


protected function parseIpv4(string $line): void
{
if (!preg_match("/^([0-9.]+)\/([0-9]{1,2})\s+([a-z0-9.-]+)/i", $line, $cols)) {
throw new UnexpectedValueException("'$line'");
}

[ $_, $ip, $maskBits, $server ] = $cols;
if ($server === "UNKNOWN") {
return;
}
$server = strtolower($server);
if (!str_contains($server, '.')) {
$server = "whois.$server.net";
}

$ipLong = ip2long($ip);
if ($ipLong === false) {
throw new UnexpectedValueException("bad ip $ip: '$line'");
}
if ($maskBits > 32 || $maskBits < 0) {
throw new UnexpectedValueException("bad subnet: '$line'");
}

$maskLong = 0xFFFFFFFF & (~0 << (32 - (int)$maskBits));

$this->ipv4Ranges[] = [ $ipLong, $maskLong, $server ];
}

protected function parseIpv6(string $line): void
{
if (!preg_match('/^([a-f0-9]{4}:[a-f0-9]{4})::\/([0-9]{1,2})\s*(\S+)/i', $line, $cols)) {
throw new UnexpectedValueException("'$line'");
}

[ $_, $ip, $maskBits, $server ] = $cols;
if ($server === "UNKNOWN") {
return;
}
if ($server === "teredo" || $server === '6to4') {
// to do
return;
}

$server = strtolower($server);
if (!str_contains($server, '.')) {
$server = "whois.$server.net";
}

$ipLong = ip6prefix2long($ip);
if ($ipLong === false) {
throw new UnexpectedValueException("bad ip $ip: '$line'");
}
if ($maskBits > 32 || $maskBits < 0) {
throw new UnexpectedValueException("bad subnet: '$line'");
}

$maskLong = 0xFFFFFFFF & (~0 << (32 - (int)$maskBits));

$this->ipv6Ranges[] = [ $ipLong, $maskLong, $server ];
}


protected function parseIpv4Recovered(string $line): void
{
if (!preg_match("/^\{\s*(\d+)U?L?\s*,\s*(\d+)U?L?\s*,\s*\"([a-z0-9.-]+)\"/i", $line, $cols)) {
throw new UnexpectedValueException("'$line'");
}

[ $_, $ip, $mask, $server ] = $cols;
$ip = (int)$ip;
$mask = (int)$mask;
if ($ip < 0 || $mask < 0 || $ip >= 2**32 || $mask >= 2**32) {
throw new UnexpectedValueException("bad range: '$line'");
}
$this->ipv4Ranges[] = [ $ip, $mask, $server ];
}

private static function merge(array $a, array $b, bool $allowOverwrite = true): array
{
foreach ($b as $key => $new) {
Expand Down Expand Up @@ -327,6 +416,7 @@ protected function dumpList(array $list, string $indent, string $eol, bool $mult
return $out;
}


protected function importWhoisListFile(string $file, callable $importFn): void
{
$data = file_get_contents($file);
Expand All @@ -339,6 +429,12 @@ protected function importWhoisListFile(string $file, callable $importFn): void
if (str_starts_with($line, '#')) {
continue;
}
if (str_starts_with($line, '/*')) {
continue;
}
if (str_starts_with($line, '//')) {
continue;
}
if (empty($line)) {
continue;
}
Expand Down Expand Up @@ -381,7 +477,7 @@ protected function importNovutecIniFile(string $file): void
continue;
}
// sort from longest to shortest for iterative matching
uasort($templates, fn ($a, $b) => strlen($b) <=> strlen($a));
uasort($templates, fn($a, $b) => strlen($b) <=> strlen($a));

// add fallback to first occurred template
if (!isset($templates['*'])) {
Expand Down Expand Up @@ -426,7 +522,25 @@ protected function generateCode(string $file): void
break;

case 'ipv4':
foreach ($this->ipv4Ranges as $range) {
[ $ipMin, $mask ] = $range;
$ipMax = $ipMin | ($mask ^ (2**32 - 1));
$modifiedFile[] = $indent . '// ' . long2ip($ipMin) . ' - ' . long2ip($ipMax) . $eol;
$modifiedFile[] = $indent . $this->dumpList($range, $indent, $eol) . ',' . $eol;
}
break;

case 'ipv6':
foreach ($this->ipv6Ranges as $range) {
[ $ipMin, $mask ] = $range;
$ipMax = $ipMin | ($mask ^ (2**32 - 1));
$rangeFirst = sprintf('%04X:%04X', $ipMin >> 16, $ipMin & (2**16 - 1));
$rangeLast = sprintf('%04X:%04X', $ipMax >> 16, $ipMax & (2**16 - 1));
$modifiedFile[] = $indent . '// ' . $rangeFirst . ' - ' . $rangeLast . $eol;
$modifiedFile[] = $indent . $this->dumpList($range, $indent, $eol) . ',' . $eol;
}
break;

case 'asn':
break;

Expand Down
Loading

0 comments on commit ad8a6f2

Please sign in to comment.