-
-
Notifications
You must be signed in to change notification settings - Fork 191
Expand file tree
/
Copy pathdeploy_to_develop.py
More file actions
540 lines (461 loc) · 20.9 KB
/
deploy_to_develop.py
File metadata and controls
540 lines (461 loc) · 20.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
# SPDX-License-Identifier: Apache-2.0
#
# http://nexb.com and https://github.com/aboutcode-org/scancode.io
# The ScanCode.io software is licensed under the Apache License version 2.0.
# Data generated with ScanCode.io is provided as-is without warranties.
# ScanCode is a trademark of nexB Inc.
#
# You may not use this software except in compliance with the License.
# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software distributed
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
# specific language governing permissions and limitations under the License.
#
# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES
# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
# ScanCode.io should be considered or used as legal advice. Consult an Attorney
# for any legal advice.
#
# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
# Visit https://github.com/aboutcode-org/scancode.io for support and download.
from aboutcode.pipeline import optional_step
from scanpipe import pipes
from scanpipe.pipelines import Pipeline
from scanpipe.pipes import d2d
from scanpipe.pipes import d2d_config
from scanpipe.pipes import flag
from scanpipe.pipes import input
from scanpipe.pipes import jvm
from scanpipe.pipes import matchcode
from scanpipe.pipes import purldb
from scanpipe.pipes import scancode
class DeployToDevelop(Pipeline):
"""
Establish relationships between two code trees: deployment and development.
This pipeline requires a minimum of two archive files, each properly tagged with:
- **from** for archives containing the development source code.
- **to** for archives containing the deployment compiled code.
When using download URLs as inputs, the "from" and "to" tags can be
provided by adding a "#from" or "#to" fragment at the end of the download URLs.
When uploading local files:
- **User Interface:** Use the "Edit flag" link in the "Inputs" panel of the Project
details view.
- **REST API:** Utilize the "upload_file_tag" field in addition to the
"upload_file".
- **Command Line Interface:** Tag uploaded files using the "filename:tag" syntax,
for example, ``--input-file path/filename:tag``.
"""
@classmethod
def steps(cls):
return (
cls.get_inputs,
cls.extract_inputs_to_codebase_directory,
cls.extract_archives,
cls.collect_and_create_codebase_resources,
cls.fingerprint_codebase_directories,
cls.flag_empty_files,
cls.flag_whitespace_files,
cls.flag_ignored_resources,
cls.load_ecosystem_config,
cls.map_ruby,
cls.map_about_files,
cls.map_checksum,
cls.match_archives_to_purldb,
cls.find_java_packages,
cls.map_java_to_class,
cls.map_jar_to_java_source,
cls.find_scala_packages,
cls.map_scala_to_class,
cls.map_jar_to_scala_source,
cls.find_kotlin_packages,
cls.map_kotlin_to_class,
cls.map_jar_to_kotlin_source,
cls.find_grammar_packages,
cls.map_grammar_to_class,
cls.map_jar_to_grammar_source,
cls.find_groovy_packages,
cls.map_groovy_to_class,
cls.map_jar_to_groovy_source,
cls.find_aspectj_packages,
cls.map_aspectj_to_class,
cls.map_jar_to_aspectj_source,
cls.find_clojure_packages,
cls.map_clojure_to_class,
cls.map_jar_to_clojure_source,
cls.find_xtend_packages,
cls.map_xtend_to_class,
cls.map_javascript,
cls.map_javascript_symbols,
cls.map_javascript_strings,
cls.get_symbols_from_binaries,
cls.map_elf,
cls.map_macho,
cls.map_winpe,
cls.map_go,
cls.map_rust,
cls.map_python,
cls.match_directories_to_purldb,
cls.match_resources_to_purldb,
cls.map_javascript_post_purldb_match,
cls.map_javascript_path,
cls.map_javascript_colocation,
cls.map_thirdparty_npm_packages,
cls.map_path,
cls.flag_mapped_resources_archives_and_ignored_directories,
cls.perform_house_keeping_tasks,
cls.match_purldb_resources_post_process,
cls.remove_packages_without_resources,
cls.scan_ignored_to_files,
cls.scan_unmapped_to_files,
cls.scan_mapped_from_for_files,
cls.collect_and_create_license_detections,
cls.flag_deployed_from_resources_with_missing_license,
cls.create_local_files_packages,
)
def get_inputs(self):
"""Locate the ``from`` and ``to`` input files."""
self.from_files, self.to_files = d2d.get_inputs(self.project)
def extract_inputs_to_codebase_directory(self):
"""Extract input files to the project's codebase/ directory."""
# Extract from files to FROM
from_path = self.project.codebase_path / d2d.FROM
for input_file_path in self.from_files:
if input.is_archive(input_file_path):
self.extract_archive(input_file_path, from_path)
else:
input.copy_input(input_file_path, from_path)
# Extract each to file to a separate subdirectory under TO
self.to_paths = []
for i, input_file_path in enumerate(self.to_files):
if len(self.to_files) > 1:
to_subpath = self.project.codebase_path / d2d.TO / str(i)
else:
to_subpath = self.project.codebase_path / d2d.TO
self.to_paths.append(
str(to_subpath.relative_to(self.project.codebase_path)) + "/"
)
if input.is_archive(input_file_path):
self.extract_archive(input_file_path, to_subpath)
else:
input.copy_input(input_file_path, to_subpath)
# Reload the project env post-extraction as the scancode-config.yml file
# may be located in one of the extracted archives.
self.env = self.project.get_env()
def run_d2d_step(self, func, *args, **kwargs):
"""Run a d2d mapping step for each to_path."""
to_paths = getattr(self, "to_paths", [None])
for to_path in to_paths:
if to_path:
kwargs["to_queryset"] = self.project.codebaseresources.filter(
path__startswith=to_path
)
kwargs["to_path"] = to_path
if len(to_paths) > 1:
self.log(f"Running {func.__name__} for {to_path}")
func(project=self.project, logger=self.log, *args, **kwargs)
def collect_and_create_codebase_resources(self):
"""Collect and create codebase resources."""
pipes.collect_and_create_codebase_resources(self.project)
def fingerprint_codebase_directories(self):
"""Compute directory fingerprints for matching"""
matchcode.fingerprint_codebase_directories(self.project, to_codebase_only=True)
def flag_whitespace_files(self):
"""Flag whitespace files with size less than or equal to 100 byte as ignored."""
d2d.flag_whitespace_files(project=self.project)
def load_ecosystem_config(self):
"""Load ecosystem specific configurations for d2d steps for selected options."""
d2d_config.load_ecosystem_config(pipeline=self, options=self.selected_groups)
@optional_step("Ruby")
def map_ruby(self):
"""Load Ruby specific configurations for d2d steps."""
pass
def map_about_files(self):
"""Map ``from/`` .ABOUT files to their related ``to/`` resources."""
self.run_d2d_step(d2d.map_about_files)
def map_checksum(self):
"""Map using SHA1 checksum."""
self.run_d2d_step(d2d.map_checksum, checksum_field="sha1")
def match_archives_to_purldb(self):
"""Match selected package archives by extension to PurlDB."""
if not purldb.is_available():
self.log("PurlDB is not available. Skipping.")
return
self.run_d2d_step(
d2d.match_purldb_resources,
extensions=self.ecosystem_config.matchable_package_extensions,
matcher_func=d2d.match_purldb_package,
)
@optional_step("Java")
def find_java_packages(self):
"""Find the java package of the .java source files."""
d2d.find_jvm_packages(
project=self.project, jvm_lang=jvm.JavaLanguage, logger=self.log
)
@optional_step("Java")
def map_java_to_class(self):
"""Map a .class compiled file to its .java source."""
self.run_d2d_step(d2d.map_jvm_to_class, jvm_lang=jvm.JavaLanguage)
@optional_step("Java")
def map_jar_to_java_source(self):
"""Map .jar files to their related source directory."""
self.run_d2d_step(d2d.map_jar_to_jvm_source, jvm_lang=jvm.JavaLanguage)
@optional_step("Scala")
def find_scala_packages(self):
"""Find the java package of the .scala source files."""
d2d.find_jvm_packages(
project=self.project, jvm_lang=jvm.ScalaLanguage, logger=self.log
)
@optional_step("Scala")
def map_scala_to_class(self):
"""Map a .class compiled file to its .scala source."""
self.run_d2d_step(d2d.map_jvm_to_class, jvm_lang=jvm.ScalaLanguage)
@optional_step("Scala")
def map_jar_to_scala_source(self):
"""Map .jar files to their related source directory."""
self.run_d2d_step(d2d.map_jar_to_jvm_source, jvm_lang=jvm.ScalaLanguage)
@optional_step("Kotlin")
def find_kotlin_packages(self):
"""Find the java package of the kotlin source files."""
d2d.find_jvm_packages(
project=self.project, jvm_lang=jvm.KotlinLanguage, logger=self.log
)
@optional_step("Kotlin")
def map_kotlin_to_class(self):
"""Map a .class compiled file to its kotlin source."""
self.run_d2d_step(d2d.map_jvm_to_class, jvm_lang=jvm.KotlinLanguage)
@optional_step("Kotlin")
def map_jar_to_kotlin_source(self):
"""Map .jar files to their related source directory."""
self.run_d2d_step(d2d.map_jar_to_jvm_source, jvm_lang=jvm.KotlinLanguage)
@optional_step("Grammar")
def find_grammar_packages(self):
"""Find the java package of the .g/.g4 source files."""
d2d.find_jvm_packages(
project=self.project, jvm_lang=jvm.GrammarLanguage, logger=self.log
)
@optional_step("Grammar")
def map_grammar_to_class(self):
"""Map a .class compiled file to its .g/.g4 source."""
self.run_d2d_step(d2d.map_jvm_to_class, jvm_lang=jvm.GrammarLanguage)
@optional_step("Grammar")
def map_jar_to_grammar_source(self):
"""Map .jar files to their related source directory."""
self.run_d2d_step(d2d.map_jar_to_jvm_source, jvm_lang=jvm.GrammarLanguage)
@optional_step("Groovy")
def find_groovy_packages(self):
"""Find the package of the .groovy source files."""
d2d.find_jvm_packages(
project=self.project, jvm_lang=jvm.GroovyLanguage, logger=self.log
)
@optional_step("Groovy")
def map_groovy_to_class(self):
"""Map a .class compiled file to its .groovy source."""
d2d.map_jvm_to_class(
project=self.project, jvm_lang=jvm.GroovyLanguage, logger=self.log
)
@optional_step("Groovy")
def map_jar_to_groovy_source(self):
"""Map .jar files to their related source directory."""
d2d.map_jar_to_jvm_source(
project=self.project, jvm_lang=jvm.GroovyLanguage, logger=self.log
)
@optional_step("AspectJ")
def find_aspectj_packages(self):
"""Find the package of the .aj source files."""
d2d.find_jvm_packages(
project=self.project, jvm_lang=jvm.AspectJLanguage, logger=self.log
)
@optional_step("AspectJ")
def map_aspectj_to_class(self):
"""Map a .class compiled file to its .aj source."""
d2d.map_jvm_to_class(
project=self.project, jvm_lang=jvm.AspectJLanguage, logger=self.log
)
@optional_step("AspectJ")
def map_jar_to_aspectj_source(self):
"""Map .jar files to their related source directory."""
d2d.map_jar_to_jvm_source(
project=self.project, jvm_lang=jvm.AspectJLanguage, logger=self.log
)
@optional_step("Clojure")
def find_clojure_packages(self):
"""Find the package of the .clj source files."""
d2d.find_jvm_packages(
project=self.project, jvm_lang=jvm.ClojureLanguage, logger=self.log
)
@optional_step("Clojure")
def map_clojure_to_class(self):
"""Map a .class compiled file to its .clj source."""
d2d.map_jvm_to_class(
project=self.project, jvm_lang=jvm.ClojureLanguage, logger=self.log
)
@optional_step("Clojure")
def map_jar_to_clojure_source(self):
"""Map .jar files to their related source directory."""
d2d.map_jar_to_jvm_source(
project=self.project, jvm_lang=jvm.ClojureLanguage, logger=self.log
)
@optional_step("Xtend")
def find_xtend_packages(self):
"""Find the java package of the xtend source files."""
d2d.find_jvm_packages(
project=self.project, jvm_lang=jvm.XtendLanguage, logger=self.log
)
@optional_step("Xtend")
def map_xtend_to_class(self):
"""Map a .class compiled file to its xtend source."""
self.run_d2d_step(d2d.map_jvm_to_class, jvm_lang=jvm.XtendLanguage)
@optional_step("JavaScript")
def map_javascript(self):
"""
Map a packed or minified JavaScript, TypeScript, CSS and SCSS
to its source.
"""
self.run_d2d_step(d2d.map_javascript)
@optional_step("JavaScript")
def map_javascript_symbols(self):
"""Map deployed JavaScript, TypeScript to its sources using symbols."""
self.run_d2d_step(d2d.map_javascript_symbols)
@optional_step("JavaScript")
def map_javascript_strings(self):
"""Map deployed JavaScript, TypeScript to its sources using string literals."""
self.run_d2d_step(d2d.map_javascript_strings)
def get_symbols_from_binaries(self):
"""Extract symbols from Elf, Mach0 and windows binaries for mapping."""
self.run_d2d_step(d2d.extract_binary_symbols, options=self.selected_groups)
@optional_step("Elf")
def map_elf(self):
"""Map ELF binaries to their sources using dwarf paths and symbols."""
self.run_d2d_step(d2d.map_elfs_with_dwarf_paths)
self.run_d2d_step(d2d.map_elfs_binaries_with_symbols)
@optional_step("MacOS")
def map_macho(self):
"""Map mach0 binaries to their sources using symbols."""
self.run_d2d_step(d2d.map_macho_binaries_with_symbols)
@optional_step("Windows")
def map_winpe(self):
"""Map winpe binaries to their sources using symbols."""
self.run_d2d_step(d2d.map_winpe_binaries_with_symbols)
@optional_step("Go")
def map_go(self):
"""Map Go binaries to their sources using paths and symbols."""
self.run_d2d_step(d2d.map_go_paths)
self.run_d2d_step(d2d.map_go_binaries_with_symbols)
@optional_step("Rust")
def map_rust(self):
"""Map Rust binaries to their sources using symbols."""
self.run_d2d_step(d2d.map_rust_binaries_with_symbols)
@optional_step("Python")
def map_python(self):
"""
Map binaries from Python packages to their sources using dwarf paths and
symbols.
"""
self.run_d2d_step(d2d.map_python_pyx_to_binaries)
self.run_d2d_step(d2d.map_python_protobuf_files)
def match_directories_to_purldb(self):
"""Match selected directories in PurlDB."""
if not purldb.is_available():
self.log("PurlDB is not available. Skipping.")
return
self.run_d2d_step(d2d.match_purldb_directories)
def match_resources_to_purldb(self):
"""Match selected files by extension in PurlDB."""
if not purldb.is_available():
self.log("PurlDB is not available. Skipping.")
return
self.run_d2d_step(
d2d.match_purldb_resources,
extensions=self.ecosystem_config.matchable_resource_extensions,
matcher_func=d2d.match_purldb_resource,
)
@optional_step("JavaScript")
def map_javascript_post_purldb_match(self):
"""Map minified javascript file based on existing PurlDB match."""
self.run_d2d_step(d2d.map_javascript_post_purldb_match)
@optional_step("JavaScript")
def map_javascript_path(self):
"""Map javascript file based on path."""
self.run_d2d_step(d2d.map_javascript_path)
@optional_step("JavaScript")
def map_javascript_colocation(self):
"""Map JavaScript files based on neighborhood file mapping."""
self.run_d2d_step(d2d.map_javascript_colocation)
@optional_step("JavaScript")
def map_thirdparty_npm_packages(self):
"""Map thirdparty package using package.json metadata."""
self.run_d2d_step(d2d.map_thirdparty_npm_packages)
def map_path(self):
"""Map using path similarities."""
self.run_d2d_step(d2d.map_path)
def flag_mapped_resources_archives_and_ignored_directories(self):
"""Flag all codebase resources that were mapped during the pipeline."""
flag.flag_mapped_resources(self.project)
flag.flag_ignored_directories(self.project)
self.run_d2d_step(d2d.flag_processed_archives)
def perform_house_keeping_tasks(self):
"""
On deployed side
- Ignore specific files based on ecosystem based configurations.
- PurlDB match files with ``no-java-source`` and empty status,
if no match is found update status to ``requires-review``.
- Update status for uninteresting files.
- Flag the dangling legal files for review.
On devel side
- Update status for not deployed files.
"""
self.run_d2d_step(d2d.match_resources_with_no_java_source)
self.run_d2d_step(d2d.handle_dangling_deployed_legal_files)
self.run_d2d_step(
d2d.ignore_unmapped_resources_from_config,
patterns_to_ignore=self.ecosystem_config.deployed_resource_path_exclusions,
)
self.run_d2d_step(
d2d.match_unmapped_resources,
matched_extensions=self.ecosystem_config.matchable_resource_extensions,
)
d2d.flag_undeployed_resources(project=self.project)
def match_purldb_resources_post_process(self):
"""Choose the best package for PurlDB matched resources."""
d2d.match_purldb_resources_post_process(self.project, logger=self.log)
def remove_packages_without_resources(self):
"""Remove packages without any resources."""
package_without_resources = self.project.discoveredpackages.filter(
codebase_resources__isnull=True
)
package_without_resources.delete()
def scan_ignored_to_files(self):
"""
Scan status="ignored-from-config" ``to/`` files for copyrights,
licenses, emails, and urls. These files are ignored based on
ecosystem specific configurations. These files are not used for the
D2D purpose, but scanning them may provide useful information about
the deployed codebase.
"""
d2d.scan_ignored_to_files(project=self.project, logger=self.log)
def scan_unmapped_to_files(self):
"""
Scan unmapped/matched ``to/`` files for copyrights, licenses,
emails, and urls and update the status to `requires-review`.
"""
d2d.scan_unmapped_to_files(project=self.project, logger=self.log)
def scan_mapped_from_for_files(self):
"""Scan mapped ``from/`` files for copyrights, licenses, emails, and urls."""
scan_files = d2d.get_from_files_for_scanning(self.project.codebaseresources)
scancode.scan_for_files(self.project, scan_files, progress_logger=self.log)
def collect_and_create_license_detections(self):
"""
Collect and create unique license detections from resources and
package data.
"""
scancode.collect_and_create_license_detections(project=self.project)
def create_local_files_packages(self):
"""Create local-files packages for codebase resources not part of a package."""
d2d.create_local_files_packages(self.project)
def flag_deployed_from_resources_with_missing_license(self):
"""Update the status for deployed from files with missing license."""
d2d.flag_deployed_from_resources_with_missing_license(
self.project,
doc_extensions=self.ecosystem_config.doc_extensions,
)