aboutsummaryrefslogtreecommitdiff
path: root/advisories/management/commands/updateadvisories.py
blob: 1d205c3ae27cff764de25ec1f41cfa320e981160 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
# Copyright (c) 2017 Catalyst.net Ltd
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
# Author: Michael Fincham <michael.fincham@catalyst.net.nz>
# Author: Filip Vujicic <filip.vujicic@catalyst.net.nz>
# Author: Sam Banks <sam.banks@catalyst.net.nz>

import time
from datetime import datetime
import bz2
import lzma
import json
import os
import re

from bs4 import BeautifulSoup

from django.conf import settings
from django.core.management.base import BaseCommand, CommandError
from django.db import transaction, IntegrityError

from dateutil.parser import parse as dateutil_parse
import deb822

import apt_inst
import apt

import pytz
import requests
import svn.remote


from advisories.models import Advisory, SourcePackage, BinaryPackage, Vulnerability

import logging

if settings.DEBUG is True:
    logging.basicConfig(format='%(asctime)s | %(levelname)s: %(message)s', level=logging.DEBUG)
else:
    logging.basicConfig(format='%(asctime)s | %(levelname)s: %(message)s', level=logging.ERROR)


class DebianFeed(object):
    """
    Syncs additions to the official DSA list in to the local database, as well as retrieving and parsing metadata about each one.
    """

    def __init__(self, secure_testing_url=None, cache_location=None, releases=None, architectures=None, snapshot_url=None, security_apt_url=None, list_location=None):
        self.secure_testing_url = secure_testing_url or "svn://anonscm.debian.org/svn/secure-testing"
        self.client = svn.remote.RemoteClient(self.secure_testing_url)
        self.cache_location = cache_location or "%s/advisory_cache/dsa" % settings.BASE_DIR
        self.releases = releases or (
            'wheezy',
            'jessie',
            'stretch',
        )
        self.architectures = architectures or (
            'i386',
            'amd64',
            'all',
        )
        self.snapshot_url = snapshot_url or "http://snapshot.debian.org"
        self.security_apt_url = security_apt_url or "http://security.debian.org/debian-security"
        self.list_location = list_location or "data/DSA/list"

    def _update_svn_repository(self):
        """
        Update the local cache of the DSA list.
        """

        try:
            os.makedirs(self.cache_location)
        except OSError: # directory already exists
            # XXX: permission denied, full, etc.
            pass

        try:
            advisory_list = self.client.cat(self.list_location).decode('utf-8')
            with open('%s/list' % self.cache_location, 'w') as advisory_list_file:
                advisory_list_file.write(advisory_list)
        except ValueError:
            raise Exception("unable to retrieve data from SVN")
        except:
            raise Exception("unknown error updating DSA list cache file")

    def _parse_svn_advisories(self):
        """
        Parse the local cache of the DSA/DLA list.
        """

        advisories = {}
        with open('%s/list' % self.cache_location) as advisory_list_file:
            advisory = ''
            packages = {}
            cves = []
            for line in advisory_list_file:

                # minimal state machine follows
                if line.startswith('['): # start of the DSA/DLA
                    if advisory != '' and len(packages) > 0: # at least one complete DSA/DLA parsed
                        advisories[advisory] = {
                            'packages': packages,
                            'description': description,
                            'issued': issued,
                            'cves': cves,
                        }
                    issued = pytz.utc.localize(dateutil_parse(line.split('] ')[0].strip('[')))
                    advisory = line.split('] ')[-1].split()[0] # upstream ID of DSA/DLA
                    '''
                    there are at least two advisories, DLA-359-1 and DLA-73-1, which don't
                    follow the "[<date>] <DLA #> <source package> - <description>" format,
                    hence the following code.
                    '''
                    if ' - ' in line:
                        description = line.split(' - ')[-1].strip()
                    else:
                        description = line.split(' ', 5)[-1].strip()

                    packages = {}
                elif line.startswith('\t['): # source package name for a particular release

                    if '<' in line: # package has some tags
                        tags = [tag.strip('<>') for tag in line.split() if tag.startswith('<') and tag.endswith('>')]
                    else:
                        tags = []

                    if 'not-affected' in tags: # ignore package
                        continue

                    release = line.split()[0].strip("\t[] ")
                    if release not in self.releases: # no point in looking for unsupported releases
                        continue

                    if 'unfixed' in tags or 'end-of-life' in tags:
                        version = '0' # unsafe at any speed
                    else:
                        version = line.split()[3]

                    source_package = line.split()[2]
                    if source_package not in packages:
                        packages[source_package] = {}
                    packages[source_package][release] = version
                elif line.startswith('\t{CVE'): # list of relevant CVEs fixed in this release
                    cves = line.strip().strip('{}').split()
                    
        return advisories

    def update_local_database(self):
        """
        Update the local repository, parse it and add any new advisories to the local database.
        """

        print("  Updating DSA RDF feed... ", end='')
        try:
            dsa_rdf_soup = BeautifulSoup(requests.get('https://www.debian.org/security/dsa-long').content, 'html.parser')
            dsa_descriptions = {i.attrs['rdf:about'].split('/')[-1].lower():BeautifulSoup(i.description.text, 'html.parser').get_text().strip() for i in dsa_rdf_soup.find_all('item')}
            print("OK")
        except:
            print("could not update DSA RDF feed")
            dsa_descriptions = {}

        print("  Updating security repository data... ", end='')

        release_metadata = {}
        source_packages = {}

        # grab the release metadata from the repository
        for release_name in self.releases:
            release_metadata[release_name] = deb822.Release(requests.get("%s/dists/%s/updates/Release" % (self.security_apt_url, release_name)).text)


        # grab the binary package metadata for the desired architectures
        # this section attempts to make a reverse mapping for working out what binary packages a particular source package builds
        for release_name, release_metadatum in release_metadata.items():

            # Chooses which filetype to use
            if 'Packages.xz\n' in str(release_metadatum):
                package_filetype = 'xz'
            elif 'Packages.bz2\n' in str(release_metadatum):
                package_filetype = 'bz2'
            else:
                raise Exception("Unknown package type")

            for component in release_metadatum['Components'].split():
                for architecture in [architecture for architecture in release_metadatum['Architectures'].split() if architecture in self.architectures]:

                    # Gets and decompresses the package data
                    packages_url = "%s/dists/%s/%s/binary-%s/Packages.%s" % (self.security_apt_url, release_name, component, architecture, package_filetype)
                    if package_filetype == 'xz':
                        packages = deb822.Deb822.iter_paragraphs(lzma.decompress(requests.get(packages_url).content).decode("utf-8"))
                    elif package_filetype == 'bz2':
                        packages = deb822.Deb822.iter_paragraphs(bz2.decompress(requests.get(packages_url).content).decode("utf-8"))
                    else:
                        raise Exception('Unable to extract file')


                    for binary_package in packages:
                        source_field = binary_package.get('Source', binary_package['Package']).split()
                        source_package_name = source_field[0]

                        try:
                            source_package_version = source_field[1].strip('()')
                        except IndexError:
                            source_package_version = binary_package['Version']

                        source_package_key = (release_name, source_package_name, source_package_version)

                        if source_package_key not in source_packages:
                            source_packages[source_package_key] = {}

                        if binary_package['Package'] not in source_packages[source_package_key]:
                            source_packages[source_package_key][binary_package['Package']] = {}

                        source_packages[source_package_key][binary_package['Package']][architecture] = binary_package['Version']

        print("OK")
        print("  Updating security-tracker data... ", end='')

        self._update_svn_repository()
        svn_advisories = self._parse_svn_advisories()
        print("OK")

        # make a set of the advisory IDs which exist on disk but not in the database
        new_advisories = set(svn_advisories) - set([advisory.upstream_id for advisory in Advisory.objects.filter(source='debian')])

        print("  Found %i new DSAs/DLAs to download" % len(new_advisories))

        for advisory in new_advisories:
            print("    Downloading %s... " % advisory, end='')
            search_packages = set()
            description = svn_advisories[advisory]['description']
            description = description[0].upper() + description[1:]
            base_dsa_name = '-'.join(advisory.lower().split('-')[0:2])
            long_description = dsa_descriptions.get(base_dsa_name, '')

            with transaction.atomic():
                db_advisory = Advisory(upstream_id=advisory, source="debian", issued=svn_advisories[advisory]['issued'], short_description=description, description=long_description)
                db_advisory.save()
                for cve in svn_advisories[advisory]['cves']:
                    db_vulnerability, created = Vulnerability.objects.get_or_create(upstream_id=cve.upper(), defaults={'first_seen': db_advisory.issued})
                    if created:
                        db_vulnerability.save()
                    db_vulnerability.advisories.add(db_advisory)
                    db_vulnerability.save()
                for package, versions in svn_advisories[advisory]['packages'].items():
                    for release, version in versions.items():
                        # make the source package object
                        db_srcpackage = SourcePackage(advisory=db_advisory, package=package, release=release, safe_version=version)
                        db_srcpackage.save()
                        search_packages.add(package)
                        search_packages.add(version)


                        # attempt by convoluted means to get the binary packages for that source package
                        try:
                            if (release, package, version) in source_packages: # package is current so in the repo
                                for binary_package_name, binary_package_architectures in source_packages[(release, package, version)].items():
                                    for architecture in binary_package_architectures:
                                        binversion = source_packages[(release, package, version)][binary_package_name][architecture]
                                        db_binpackage = BinaryPackage(source_package=db_srcpackage, advisory=db_advisory, package=binary_package_name, release=release, safe_version=binversion, architecture=architecture)
                                        db_binpackage.save()
                                        search_packages.add(binary_package_name)
                                        search_packages.add(version)
                            else: # package is not latest in the repo, hopefully it's on snapshots.d.o
                                snapshot_url = "%s/mr/package/%s/%s/allfiles" % (self.snapshot_url, package, version)
                                snapshot_response = requests.get(snapshot_url)
                                if snapshot_response.status_code == 404:
                                    print('Package not in snapshots either, removing for now and will try again next time')
                                    raise IntegrityError("No packages for this advisory yet")
                                snapshot_data = snapshot_response.json()
                                if snapshot_data['version'] != version:
                                    raise Exception("snapshots.d.o returned non-matching result")

                                for snapshot_binary in snapshot_data['result']['binaries']:
                                    snapshot_binary_architectures = [file['architecture'] for file in snapshot_binary['files'] if file['architecture'] in self.architectures]
                                    for architecture in snapshot_binary_architectures:
                                        db_binpackage = BinaryPackage(source_package=db_srcpackage, advisory=db_advisory, package=snapshot_binary['name'], release=release, safe_version=snapshot_binary['version'], architecture=architecture)
                                        db_binpackage.save()
                                        search_packages.add(snapshot_binary['name'])
                                        search_packages.add(snapshot_binary['version'])

                            db_advisory.search_keywords = " ".join(search_packages)
                            db_advisory.save()

                            print("OK")
                        except KeyboardInterrupt:
                            raise
                        except:
                            print("could not get binary packages for %s/%s, assuming there are none" % (release, package))

class UbuntuFeed(object):
    """
    Syncs the latest additions to the USN JSON file in to the local database.
    """

    def __init__(self, usn_url=None, cache_location=None, releases=None, architectures=None):
        self.usn_url = usn_url or 'https://usn.ubuntu.com/usn-db/database.json.bz2'
        self.cache_location = cache_location or '%s/advisory_cache/usn' % settings.BASE_DIR
        self.releases = releases or (
            'trusty',
            'xenial',
        )
        self.architectures = architectures or (
            'i386',
            'amd64',
            'all',
        )

    def _update_json_advisories(self):
        """
        Download and decompress the latest USN data from Ubuntu.
        """
        try:
            os.makedirs(self.cache_location)
        except OSError: # directory already exists
            pass

        response = requests.get(self.usn_url, stream=True) # the USN list is a bzip'd JSON file of all the current advisories for all supported releases
        bytes_downloaded = 0
        with open("%s/incoming-database.json.bz2" % self.cache_location, 'wb') as f:
            for chunk in response.iter_content(chunk_size=1024):
                if chunk:
                    f.write(chunk)
                    f.flush()
                    bytes_downloaded += len(chunk)

        if bytes_downloaded < 1500: # sanity check
            raise Exception("could not download USN feed")
        else:
            try:
                # un-bzip the file using the bz2 library and atomically replace the existing one if this succeeds
                with open("%s/incoming-database.json" % self.cache_location, 'wb') as decompressed, bz2.BZ2File("%s/incoming-database.json.bz2" % self.cache_location, 'rb') as compressed:
                    for data in iter(lambda : compressed.read(100 * 1024), b''):
                        decompressed.write(data)
                os.rename("%s/incoming-database.json" % self.cache_location, "%s/database.json" % self.cache_location)
            except:
                raise Exception("could not decompress USN feed")

    def _parse_json_advisories(self):
        """
        Produce a dictionary representing USN data from the cache file.
        """

        with open("%s/database.json" % self.cache_location) as usn_list_file:
            return json.loads(usn_list_file.read())

    @transaction.atomic
    def update_local_database(self):
        """
        Retrieve the latest JSON data, parse it and add any new advisories to the local database.
        """
        print("  Downloading JSON data...")
        self._update_json_advisories()
        json_advisories = self._parse_json_advisories()
        new_advisories = set(json_advisories) - set(['-'.join(advisory.upstream_id.split('-')[1:]) for advisory in Advisory.objects.filter(source='ubuntu')])

        print("  Found %i new USNs to process" % len(new_advisories))


        for advisory in new_advisories:
            print("    Processing USN %s... " % advisory, end='')

            search_packages = set()

            try:
                advisory_data = json_advisories[advisory]
                db_advisory = Advisory(
                    upstream_id="USN-%s" % advisory,
                    source="ubuntu",
                    issued=datetime.utcfromtimestamp(advisory_data['timestamp']).replace(tzinfo=pytz.utc),
                    description=advisory_data.get('description', None),
                    action=advisory_data.get('action', None),
                    short_description=advisory_data.get('isummary', None)
                )
                db_advisory.save()

                for cve in advisory_data.get('cves', []):
                    db_vulnerability, created = Vulnerability.objects.get_or_create(upstream_id=cve.upper(), defaults={'first_seen': db_advisory.issued})
                    if created:
                        db_vulnerability.save()
                    db_vulnerability.advisories.add(db_advisory)
                    db_vulnerability.save()
                
                for release, release_data in {release:release_data for release, release_data in json_advisories[advisory]['releases'].items() if release in self.releases}.items():

                    # Source packages
                    for src_package, src_package_data in release_data['sources'].items():
                        db_srcpackage = SourcePackage(advisory=db_advisory, package=src_package, release=release, safe_version=src_package_data['version'])
                        db_srcpackage.save()
                        search_packages.add(src_package)
                        search_packages.add(src_package_data['version'])

                    # Binary packages
                    for bin_package, bin_package_data in release_data['binaries'].items():
                        bin_package_version = bin_package_data['version']

                        # Goes through the architectures to see if the binary package is in them
                        for architecture in [architecture for architecture in release_data.get('archs', {'none': 'dummy'}).keys() if architecture in self.architectures]:
                            for url in release_data['archs'][architecture]['urls'].keys():

                                # If binary package is in architecture, add package to db
                                if bin_package == url.split('/')[-1].split('_')[0]:
                                    # Adds a binary package in db for each architecture
                                    db_binpackage = BinaryPackage(advisory=db_advisory, package=bin_package, release=release, safe_version=bin_package_version, architecture=architecture)
                                    db_binpackage.save()
                                    search_packages.add(bin_package)
                                    search_packages.add(bin_package_version)

                db_advisory.search_keywords = " ".join(search_packages)
                db_advisory.save()
            except:
                print("Error")
                raise
            else:
                print("OK")

class Command(BaseCommand):
    help = 'Update all sources of advisories'

    def handle(self, *args, **options):
        self.stdout.write(self.style.MIGRATE_HEADING("Updating DSAs..."))
        feed = DebianFeed()
        feed.update_local_database()

        self.stdout.write(self.style.MIGRATE_HEADING("Updating DLAs..."))
        feed = DebianFeed(cache_location='%s/advisory_cache/dla' % settings.BASE_DIR, list_location='data/DLA/list')
        feed.update_local_database()

        self.stdout.write(self.style.MIGRATE_HEADING("Updating USNs..."))
        feed = UbuntuFeed()
        feed.update_local_database()

        with open("%s/advisory_cache/timestamp" % settings.BASE_DIR, 'w') as timestamp:
            timestamp.write(str(int(time.time())))