I am trying to create a beam pipeline to apply multiple ParDo Transforms at the same time on one PCollection and collect and print all results in a list. So far I've experiencing Sequential process, Like first ParDo then second ParDo after that. Here's an example I have prepared for my issue:
import apache_beam as beam
from apache_beam.options.pipeline_options import PipelineOptions
p = beam.Pipeline(options=PipelineOptions())
class Tr1(beam.DoFn):
def process(self, number):
number = number + 1
yield number
class Tr2(beam.DoFn):
def process(self, number):
number = number + 2
yield number
def pipeline_test():
numbers = p | "Create" >> beam.Create([1])
tr1 = numbers | "Tr1" >> beam.ParDo(Tr1())
tr2 = numbers | "Tr2" >> beam.ParDo(Tr2())
tr1 | "Print1" >> beam.Map(print)
tr2 | "Print2" >> beam.Map(print)
def main(argv):
del argv
pipeline_test()
result = p.run()
result.wait_until_finish()
if __name__ == '__main__':
app.run(main)